From ac72641a1d17fc90f10b4db5f15407f9e4c0688e Mon Sep 17 00:00:00 2001 From: NianJiuZst <3235467914@qq.com> Date: Wed, 13 May 2026 02:52:38 +0800 Subject: [PATCH] feat: add audio file save support to SpeechSDK and MusicSDK Add `save()` methods that decode hex-encoded audio from API responses and write it to disk, matching the CLI's audio output behavior: SpeechSDK.save(response, outPath?, ext?) - Decodes hex audio data from synthesize() responses - Validates hex format before decoding - Creates intermediate directories as needed - Generates timestamp-based default filenames (speech_.mp3) - Handles disk-full (ENOSPC) errors gracefully MusicSDK.save(response, outPath?, ext?) - Same hex-decode logic for generate() responses - Default filename prefix: music_.mp3 - Same validation and error handling Co-Authored-By: Claude Opus 4.6 --- src/sdk/music/index.ts | 49 ++++++++++++++++++++++++++++++++++++++ src/sdk/speech/index.ts | 49 ++++++++++++++++++++++++++++++++++++++ test/sdk/music.test.ts | 52 +++++++++++++++++++++++++++++++++++++++++ test/sdk/speech.test.ts | 52 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 202 insertions(+) diff --git a/src/sdk/music/index.ts b/src/sdk/music/index.ts index 37c4b14..090b837 100644 --- a/src/sdk/music/index.ts +++ b/src/sdk/music/index.ts @@ -1,3 +1,5 @@ +import { existsSync, mkdirSync, writeFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; import { Client } from "../client"; import { musicEndpoint } from "../../client/endpoints"; import { MusicRequest, MusicResponse } from "../../types/api"; @@ -7,6 +9,21 @@ import { ExitCode } from "../../errors/codes"; import { toMerged } from "es-toolkit/object"; import { musicGenerateModel } from "../../commands/music/models"; +function hexToBuffer(hex: string): Buffer { + if (!/^[0-9a-fA-F]*$/.test(hex)) { + throw new SDKError('API returned invalid audio data (not valid hex).', ExitCode.GENERAL); + } + if (hex.length % 2 !== 0) { + throw new SDKError('API returned truncated audio data (odd-length hex string).', ExitCode.GENERAL); + } + return Buffer.from(hex, 'hex'); +} + +function defaultFilename(prefix: string, ext: string): string { + const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-'); + return `${prefix}_${ts}.${ext}`; +} + export interface MusicGenerateRequest extends MusicRequest { /** Vocal style, e.g. "warm male baritone", "bright female soprano", "duet with harmonies" */ vocals?: string; @@ -81,6 +98,38 @@ export class MusicSDK extends Client { }); } + /** + * Save generated music audio to a file. Decodes the hex-encoded audio + * from the API response and writes it to disk. Creates intermediate + * directories as needed. + * + * @param response — The response from `generate()`. + * @param outPath — Target file path. Defaults to `music_.mp3`. + * @param ext — File extension (default: `"mp3"`). + * @returns The absolute path of the saved file. + */ + save(response: MusicResponse, outPath?: string, ext = 'mp3'): string { + const dest = resolve(outPath || defaultFilename('music', ext)); + const audioHex = response.data.audio; + if (!audioHex) { + throw new SDKError('API response missing audio data.', ExitCode.GENERAL); + } + + const dir = dirname(dest); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + + try { + writeFileSync(dest, hexToBuffer(audioHex)); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOSPC') { + throw new SDKError('Disk full — cannot write audio file.', ExitCode.GENERAL); + } + throw err; + } + + return dest; + } + private buildPrompt(request: ModelPartial) { const structuredParts: string[] = []; if (request.vocals) structuredParts.push(`Vocals: ${request.vocals as string}`); diff --git a/src/sdk/speech/index.ts b/src/sdk/speech/index.ts index 91d2f59..5c4844c 100644 --- a/src/sdk/speech/index.ts +++ b/src/sdk/speech/index.ts @@ -1,3 +1,5 @@ +import { existsSync, mkdirSync, writeFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; import { Client } from "../client"; import { speechEndpoint, voicesEndpoint } from "../../client/endpoints"; import { SpeechRequest, SpeechResponse, VoiceListResponse } from "../../types/api"; @@ -7,6 +9,21 @@ import { ExitCode } from "../../errors/codes"; import { toMerged } from "es-toolkit/object"; import { ModelPartial } from "../types"; +function hexToBuffer(hex: string): Buffer { + if (!/^[0-9a-fA-F]*$/.test(hex)) { + throw new SDKError('API returned invalid audio data (not valid hex).', ExitCode.GENERAL); + } + if (hex.length % 2 !== 0) { + throw new SDKError('API returned truncated audio data (odd-length hex string).', ExitCode.GENERAL); + } + return Buffer.from(hex, 'hex'); +} + +function defaultFilename(prefix: string, ext: string): string { + const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-'); + return `${prefix}_${ts}.${ext}`; +} + export class SpeechSDK extends Client { async synthesize(request: ModelPartial & { stream: true }): Promise>; async synthesize(request: ModelPartial): Promise; @@ -56,6 +73,38 @@ export class SpeechSDK extends Client { return voices; } + /** + * Save synthesized speech audio to a file. Decodes the hex-encoded audio + * from the API response and writes it to disk. Creates intermediate + * directories as needed. + * + * @param response — The response from `synthesize()`. + * @param outPath — Target file path. Defaults to `speech_.mp3`. + * @param ext — File extension (default: `"mp3"`). + * @returns The absolute path of the saved file. + */ + save(response: SpeechResponse, outPath?: string, ext = 'mp3'): string { + const dest = resolve(outPath || defaultFilename('speech', ext)); + const audioHex = response.data.audio; + if (!audioHex) { + throw new SDKError('API response missing audio data.', ExitCode.GENERAL); + } + + const dir = dirname(dest); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + + try { + writeFileSync(dest, hexToBuffer(audioHex)); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOSPC') { + throw new SDKError('Disk full — cannot write audio file.', ExitCode.GENERAL); + } + throw err; + } + + return dest; + } + private validateParams(params: Partial): SpeechRequest { if (!params.text) { throw new SDKError('text is required', ExitCode.USAGE); diff --git a/test/sdk/music.test.ts b/test/sdk/music.test.ts index 4b822e0..8a72b40 100644 --- a/test/sdk/music.test.ts +++ b/test/sdk/music.test.ts @@ -1,6 +1,21 @@ import { describe, it, expect, afterEach } from 'bun:test'; import { createMockServer, jsonResponse, type MockServer } from '../helpers/mock-server'; import { MiniMaxSDK } from '../../src/sdk'; +import { MusicSDK } from '../../src/sdk/music'; +import { existsSync, unlinkSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import type { MusicResponse } from '../../src/types/api'; + +function makeMusicResponse(hexAudio?: string): MusicResponse { + return { + base_resp: { status_code: 0, status_msg: 'ok' }, + data: { + audio: hexAudio || Buffer.from('hello music audio').toString('hex'), + status: 0, + }, + }; +} describe('MiniMaxSDK.music', () => { let server: MockServer; @@ -32,3 +47,40 @@ describe('MiniMaxSDK.music', () => { expect(result.data.audio_url).toBe('https://example.com/music.mp3'); }); }); + +describe('MusicSDK.save', () => { + const sdk = new MusicSDK({ apiKey: 'sk-test', region: 'global' }); + + it('decodes hex audio and saves to disk', () => { + const out = join(tmpdir(), `music-sdk-save-${Date.now()}.mp3`); + const response = makeMusicResponse(); + + const saved = sdk.save(response, out); + expect(saved).toBe(out); + expect(existsSync(out)).toBe(true); + expect(readFileSync(out).toString()).toBe('hello music audio'); + unlinkSync(out); + }); + + it('generates default filename with timestamp', () => { + const response = makeMusicResponse(); + const saved = sdk.save(response); + expect(saved).toMatch(/music_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.mp3/); + expect(existsSync(saved)).toBe(true); + unlinkSync(saved); + }); + + it('creates intermediate directories', () => { + const out = join(tmpdir(), `music-sdk-deep-${Date.now()}`, 'x', 'y', 'song.wav'); + const response = makeMusicResponse(); + const saved = sdk.save(response, out, 'wav'); + expect(existsSync(saved)).toBe(true); + unlinkSync(saved); + }); + + it('throws when audio data is missing', () => { + const response = makeMusicResponse(''); + response.data.audio = undefined; + expect(() => sdk.save(response, '/tmp/test.mp3')).toThrow('missing audio data'); + }); +}); diff --git a/test/sdk/speech.test.ts b/test/sdk/speech.test.ts index 7a29309..772ae7e 100644 --- a/test/sdk/speech.test.ts +++ b/test/sdk/speech.test.ts @@ -1,6 +1,21 @@ import { describe, it, expect, afterEach } from 'bun:test'; import { createMockServer, jsonResponse, type MockServer } from '../helpers/mock-server'; import { MiniMaxSDK } from '../../src/sdk'; +import { SpeechSDK } from '../../src/sdk/speech'; +import { existsSync, unlinkSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import type { SpeechResponse } from '../../src/types/api'; + +function makeSpeechResponse(hexAudio?: string): SpeechResponse { + return { + base_resp: { status_code: 0, status_msg: 'ok' }, + data: { + audio: hexAudio || Buffer.from('hello speech audio').toString('hex'), + status: 0, + }, + }; +} describe('MiniMaxSDK.speech', () => { let server: MockServer; @@ -54,3 +69,40 @@ describe('MiniMaxSDK.speech', () => { expect(voices[0].voice_id).toBe('voice-1'); }); }); + +describe('SpeechSDK.save', () => { + const sdk = new SpeechSDK({ apiKey: 'sk-test', region: 'global' }); + + it('decodes hex audio and saves to disk', () => { + const out = join(tmpdir(), `speech-sdk-save-${Date.now()}.mp3`); + const response = makeSpeechResponse(); + + const saved = sdk.save(response, out); + expect(saved).toBe(out); + expect(existsSync(out)).toBe(true); + expect(readFileSync(out).toString()).toBe('hello speech audio'); + unlinkSync(out); + }); + + it('generates default filename with timestamp', () => { + const response = makeSpeechResponse(); + const saved = sdk.save(response); + expect(saved).toMatch(/speech_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.mp3/); + expect(existsSync(saved)).toBe(true); + unlinkSync(saved); + }); + + it('creates intermediate directories', () => { + const out = join(tmpdir(), `speech-sdk-deep-${Date.now()}`, 'a', 'b', 'out.wav'); + const response = makeSpeechResponse(); + const saved = sdk.save(response, out, 'wav'); + expect(existsSync(saved)).toBe(true); + unlinkSync(saved); + }); + + it('throws when audio data is missing', () => { + const response = makeSpeechResponse(''); + response.data.audio = undefined; + expect(() => sdk.save(response, '/tmp/test.mp3')).toThrow('missing audio data'); + }); +});