From ac72641a1d17fc90f10b4db5f15407f9e4c0688e Mon Sep 17 00:00:00 2001
From: NianJiuZst <3235467914@qq.com>
Date: Wed, 13 May 2026 02:52:38 +0800
Subject: [PATCH] feat: add audio file save support to SpeechSDK and MusicSDK

Add `save()` methods that decode hex-encoded audio from API responses
and write it to disk, matching the CLI's audio output behavior:

SpeechSDK.save(response, outPath?, ext?)
- Decodes hex audio data from synthesize() responses
- Validates hex format before decoding
- Creates intermediate directories as needed
- Generates timestamp-based default filenames (speech_<ts>.mp3)
- Handles disk-full (ENOSPC) errors gracefully

MusicSDK.save(response, outPath?, ext?)
- Same hex-decode logic for generate() responses
- Default filename prefix: music_<ts>.mp3
- Same validation and error handling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/sdk/music/index.ts  | 49 ++++++++++++++++++++++++++++++++++++++
 src/sdk/speech/index.ts | 49 ++++++++++++++++++++++++++++++++++++++
 test/sdk/music.test.ts  | 52 +++++++++++++++++++++++++++++++++++++++++
 test/sdk/speech.test.ts | 52 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 202 insertions(+)
diff --git a/src/sdk/music/index.ts b/src/sdk/music/index.ts
index 37c4b14..090b837 100644
--- a/src/sdk/music/index.ts
+++ b/src/sdk/music/index.ts
@@ -1,3 +1,5 @@
+import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
+import { resolve, dirname } from 'node:path';
 import { Client } from "../client";
 import { musicEndpoint } from "../../client/endpoints";
 import { MusicRequest, MusicResponse } from "../../types/api";
@@ -7,6 +9,21 @@ import { ExitCode } from "../../errors/codes";
 import { toMerged } from "es-toolkit/object";
 import { musicGenerateModel } from "../../commands/music/models";
 
+function hexToBuffer(hex: string): Buffer {
+  if (!/^[0-9a-fA-F]*$/.test(hex)) {
+    throw new SDKError('API returned invalid audio data (not valid hex).', ExitCode.GENERAL);
+  }
+  if (hex.length % 2 !== 0) {
+    throw new SDKError('API returned truncated audio data (odd-length hex string).', ExitCode.GENERAL);
+  }
+  return Buffer.from(hex, 'hex');
+}
+
+function defaultFilename(prefix: string, ext: string): string {
+  const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-');
+  return `${prefix}_${ts}.${ext}`;
+}
+
 export interface MusicGenerateRequest extends MusicRequest {
   /** Vocal style, e.g. "warm male baritone", "bright female soprano", "duet with harmonies" */
   vocals?: string;
@@ -81,6 +98,38 @@ export class MusicSDK extends Client {
     });
   }
 
+  /**
+   * Save generated music audio to a file. Decodes the hex-encoded audio
+   * from the API response and writes it to disk. Creates intermediate
+   * directories as needed.
+   *
+   * @param response — The response from `generate()`.
+   * @param outPath  — Target file path. Defaults to `music_<timestamp>.mp3`.
+   * @param ext      — File extension (default: `"mp3"`).
+   * @returns The absolute path of the saved file.
+   */
+  save(response: MusicResponse, outPath?: string, ext = 'mp3'): string {
+    const dest = resolve(outPath || defaultFilename('music', ext));
+    const audioHex = response.data.audio;
+    if (!audioHex) {
+      throw new SDKError('API response missing audio data.', ExitCode.GENERAL);
+    }
+
+    const dir = dirname(dest);
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+
+    try {
+      writeFileSync(dest, hexToBuffer(audioHex));
+    } catch (err) {
+      if ((err as NodeJS.ErrnoException).code === 'ENOSPC') {
+        throw new SDKError('Disk full — cannot write audio file.', ExitCode.GENERAL);
+      }
+      throw err;
+    }
+
+    return dest;
+  }
+
   private buildPrompt(request: ModelPartial<MusicGenerateRequest>) {
     const structuredParts: string[] = [];
     if (request.vocals)      structuredParts.push(`Vocals: ${request.vocals as string}`);
diff --git a/src/sdk/speech/index.ts b/src/sdk/speech/index.ts
index 91d2f59..5c4844c 100644
--- a/src/sdk/speech/index.ts
+++ b/src/sdk/speech/index.ts
@@ -1,3 +1,5 @@
+import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
+import { resolve, dirname } from 'node:path';
 import { Client } from "../client";
 import { speechEndpoint, voicesEndpoint } from "../../client/endpoints";
 import { SpeechRequest, SpeechResponse, VoiceListResponse } from "../../types/api";
@@ -7,6 +9,21 @@ import { ExitCode } from "../../errors/codes";
 import { toMerged } from "es-toolkit/object";
 import { ModelPartial } from "../types";
 
+function hexToBuffer(hex: string): Buffer {
+  if (!/^[0-9a-fA-F]*$/.test(hex)) {
+    throw new SDKError('API returned invalid audio data (not valid hex).', ExitCode.GENERAL);
+  }
+  if (hex.length % 2 !== 0) {
+    throw new SDKError('API returned truncated audio data (odd-length hex string).', ExitCode.GENERAL);
+  }
+  return Buffer.from(hex, 'hex');
+}
+
+function defaultFilename(prefix: string, ext: string): string {
+  const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-');
+  return `${prefix}_${ts}.${ext}`;
+}
+
 export class SpeechSDK extends Client {
   async synthesize(request: ModelPartial<SpeechRequest> & { stream: true }): Promise<AsyncGenerator<SpeechResponse>>;
   async synthesize(request: ModelPartial<SpeechRequest>): Promise<SpeechResponse>;
@@ -56,6 +73,38 @@ export class SpeechSDK extends Client {
     return voices;
   }
 
+  /**
+   * Save synthesized speech audio to a file. Decodes the hex-encoded audio
+   * from the API response and writes it to disk. Creates intermediate
+   * directories as needed.
+   *
+   * @param response — The response from `synthesize()`.
+   * @param outPath  — Target file path. Defaults to `speech_<timestamp>.mp3`.
+   * @param ext      — File extension (default: `"mp3"`).
+   * @returns The absolute path of the saved file.
+   */
+  save(response: SpeechResponse, outPath?: string, ext = 'mp3'): string {
+    const dest = resolve(outPath || defaultFilename('speech', ext));
+    const audioHex = response.data.audio;
+    if (!audioHex) {
+      throw new SDKError('API response missing audio data.', ExitCode.GENERAL);
+    }
+
+    const dir = dirname(dest);
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+
+    try {
+      writeFileSync(dest, hexToBuffer(audioHex));
+    } catch (err) {
+      if ((err as NodeJS.ErrnoException).code === 'ENOSPC') {
+        throw new SDKError('Disk full — cannot write audio file.', ExitCode.GENERAL);
+      }
+      throw err;
+    }
+
+    return dest;
+  }
+
   private validateParams(params: Partial<SpeechRequest>): SpeechRequest {
     if (!params.text) {
       throw new SDKError('text is required', ExitCode.USAGE);
diff --git a/test/sdk/music.test.ts b/test/sdk/music.test.ts
index 4b822e0..8a72b40 100644
--- a/test/sdk/music.test.ts
+++ b/test/sdk/music.test.ts
@@ -1,6 +1,21 @@
 import { describe, it, expect, afterEach } from 'bun:test';
 import { createMockServer, jsonResponse, type MockServer } from '../helpers/mock-server';
 import { MiniMaxSDK } from '../../src/sdk';
+import { MusicSDK } from '../../src/sdk/music';
+import { existsSync, unlinkSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import type { MusicResponse } from '../../src/types/api';
+
+function makeMusicResponse(hexAudio?: string): MusicResponse {
+  return {
+    base_resp: { status_code: 0, status_msg: 'ok' },
+    data: {
+      audio: hexAudio || Buffer.from('hello music audio').toString('hex'),
+      status: 0,
+    },
+  };
+}
 
 describe('MiniMaxSDK.music', () => {
   let server: MockServer;
@@ -32,3 +47,40 @@ describe('MiniMaxSDK.music', () => {
     expect(result.data.audio_url).toBe('https://example.com/music.mp3');
   });
 });
+
+describe('MusicSDK.save', () => {
+  const sdk = new MusicSDK({ apiKey: 'sk-test', region: 'global' });
+
+  it('decodes hex audio and saves to disk', () => {
+    const out = join(tmpdir(), `music-sdk-save-${Date.now()}.mp3`);
+    const response = makeMusicResponse();
+
+    const saved = sdk.save(response, out);
+    expect(saved).toBe(out);
+    expect(existsSync(out)).toBe(true);
+    expect(readFileSync(out).toString()).toBe('hello music audio');
+    unlinkSync(out);
+  });
+
+  it('generates default filename with timestamp', () => {
+    const response = makeMusicResponse();
+    const saved = sdk.save(response);
+    expect(saved).toMatch(/music_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.mp3/);
+    expect(existsSync(saved)).toBe(true);
+    unlinkSync(saved);
+  });
+
+  it('creates intermediate directories', () => {
+    const out = join(tmpdir(), `music-sdk-deep-${Date.now()}`, 'x', 'y', 'song.wav');
+    const response = makeMusicResponse();
+    const saved = sdk.save(response, out, 'wav');
+    expect(existsSync(saved)).toBe(true);
+    unlinkSync(saved);
+  });
+
+  it('throws when audio data is missing', () => {
+    const response = makeMusicResponse('');
+    response.data.audio = undefined;
+    expect(() => sdk.save(response, '/tmp/test.mp3')).toThrow('missing audio data');
+  });
+});
diff --git a/test/sdk/speech.test.ts b/test/sdk/speech.test.ts
index 7a29309..772ae7e 100644
--- a/test/sdk/speech.test.ts
+++ b/test/sdk/speech.test.ts
@@ -1,6 +1,21 @@
 import { describe, it, expect, afterEach } from 'bun:test';
 import { createMockServer, jsonResponse, type MockServer } from '../helpers/mock-server';
 import { MiniMaxSDK } from '../../src/sdk';
+import { SpeechSDK } from '../../src/sdk/speech';
+import { existsSync, unlinkSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import type { SpeechResponse } from '../../src/types/api';
+
+function makeSpeechResponse(hexAudio?: string): SpeechResponse {
+  return {
+    base_resp: { status_code: 0, status_msg: 'ok' },
+    data: {
+      audio: hexAudio || Buffer.from('hello speech audio').toString('hex'),
+      status: 0,
+    },
+  };
+}
 
 describe('MiniMaxSDK.speech', () => {
   let server: MockServer;
@@ -54,3 +69,40 @@ describe('MiniMaxSDK.speech', () => {
     expect(voices[0].voice_id).toBe('voice-1');
   });
 });
+
+describe('SpeechSDK.save', () => {
+  const sdk = new SpeechSDK({ apiKey: 'sk-test', region: 'global' });
+
+  it('decodes hex audio and saves to disk', () => {
+    const out = join(tmpdir(), `speech-sdk-save-${Date.now()}.mp3`);
+    const response = makeSpeechResponse();
+
+    const saved = sdk.save(response, out);
+    expect(saved).toBe(out);
+    expect(existsSync(out)).toBe(true);
+    expect(readFileSync(out).toString()).toBe('hello speech audio');
+    unlinkSync(out);
+  });
+
+  it('generates default filename with timestamp', () => {
+    const response = makeSpeechResponse();
+    const saved = sdk.save(response);
+    expect(saved).toMatch(/speech_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.mp3/);
+    expect(existsSync(saved)).toBe(true);
+    unlinkSync(saved);
+  });
+
+  it('creates intermediate directories', () => {
+    const out = join(tmpdir(), `speech-sdk-deep-${Date.now()}`, 'a', 'b', 'out.wav');
+    const response = makeSpeechResponse();
+    const saved = sdk.save(response, out, 'wav');
+    expect(existsSync(saved)).toBe(true);
+    unlinkSync(saved);
+  });
+
+  it('throws when audio data is missing', () => {
+    const response = makeSpeechResponse('');
+    response.data.audio = undefined;
+    expect(() => sdk.save(response, '/tmp/test.mp3')).toThrow('missing audio data');
+  });
+});