Skip to content

Commit 79d2e7f

Browse files
committed
Working m4b
1 parent 9357ddd commit 79d2e7f

File tree

6 files changed

+422
-88
lines changed

6 files changed

+422
-88
lines changed

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Use Node.js slim image
22
FROM node:slim
33

4+
# Add ffmpeg
5+
RUN apt-get update && apt-get install -y ffmpeg
6+
47
# Create app directory
58
WORKDIR /app
69

src/app/api/audio/convert/route.ts

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import { NextRequest, NextResponse } from 'next/server';
2+
import { spawn } from 'child_process';
3+
import { writeFile, readFile, mkdir, unlink, rmdir } from 'fs/promises';
4+
import { existsSync } from 'fs';
5+
import { join } from 'path';
6+
import { randomUUID } from 'crypto';
7+
8+
interface Chapter {
9+
title: string;
10+
buffer: number[];
11+
}
12+
13+
interface ConversionRequest {
14+
chapters: Chapter[];
15+
}
16+
17+
async function getAudioDuration(filePath: string): Promise<number> {
18+
return new Promise((resolve, reject) => {
19+
const ffprobe = spawn('ffprobe', [
20+
'-i', filePath,
21+
'-show_entries', 'format=duration',
22+
'-v', 'quiet',
23+
'-of', 'csv=p=0'
24+
]);
25+
26+
let output = '';
27+
ffprobe.stdout.on('data', (data) => {
28+
output += data.toString();
29+
});
30+
31+
ffprobe.on('close', (code) => {
32+
if (code === 0) {
33+
const duration = parseFloat(output.trim());
34+
resolve(duration);
35+
} else {
36+
reject(new Error(`ffprobe process exited with code ${code}`));
37+
}
38+
});
39+
40+
ffprobe.on('error', (err) => {
41+
reject(err);
42+
});
43+
});
44+
}
45+
46+
async function runFFmpeg(args: string[]): Promise<void> {
47+
return new Promise<void>((resolve, reject) => {
48+
const ffmpeg = spawn('ffmpeg', args);
49+
50+
ffmpeg.stderr.on('data', (data) => {
51+
console.error(`ffmpeg stderr: ${data}`);
52+
});
53+
54+
ffmpeg.on('close', (code) => {
55+
if (code === 0) {
56+
resolve();
57+
} else {
58+
reject(new Error(`FFmpeg process exited with code ${code}`));
59+
}
60+
});
61+
62+
ffmpeg.on('error', (err) => {
63+
reject(err);
64+
});
65+
});
66+
}
67+
68+
export async function POST(request: NextRequest) {
69+
try {
70+
// Parse the request body
71+
const data: ConversionRequest = await request.json();
72+
73+
// Create temp directory if it doesn't exist
74+
const tempDir = join(process.cwd(), 'temp');
75+
if (!existsSync(tempDir)) {
76+
await mkdir(tempDir);
77+
}
78+
79+
// Generate unique filenames
80+
const id = randomUUID();
81+
const outputPath = join(tempDir, `${id}.m4b`);
82+
const metadataPath = join(tempDir, `${id}.txt`);
83+
const intermediateDir = join(tempDir, `${id}-intermediate`);
84+
85+
// Create intermediate directory
86+
if (!existsSync(intermediateDir)) {
87+
await mkdir(intermediateDir);
88+
}
89+
90+
// First, write each chapter to a temporary file and get its duration
91+
const chapterFiles: { path: string; title: string; duration: number }[] = [];
92+
let currentTime = 0;
93+
94+
for (let i = 0; i < data.chapters.length; i++) {
95+
const chapter = data.chapters[i];
96+
const inputPath = join(intermediateDir, `${i}-input.aac`);
97+
const outputPath = join(intermediateDir, `${i}.wav`);
98+
99+
// Write the chapter audio to a temp file
100+
await writeFile(inputPath, Buffer.from(new Uint8Array(chapter.buffer)));
101+
102+
// Convert to WAV with consistent format (this helps with timestamp issues)
103+
await runFFmpeg([
104+
'-i', inputPath,
105+
'-acodec', 'pcm_s16le',
106+
'-ar', '44100',
107+
'-ac', '2',
108+
outputPath
109+
]);
110+
111+
// Get the duration of this chapter
112+
const duration = await getAudioDuration(outputPath);
113+
114+
chapterFiles.push({
115+
path: outputPath,
116+
title: chapter.title,
117+
duration
118+
});
119+
120+
// Clean up input file
121+
await unlink(inputPath).catch(console.error);
122+
}
123+
124+
// Create chapter metadata file
125+
const metadata: string[] = [];
126+
metadata.push(
127+
`title=Kokoro Audiobook`,
128+
`artist=KokoroTTS`,
129+
);
130+
131+
// Calculate chapter timings based on actual durations
132+
chapterFiles.forEach((chapter, index) => {
133+
const startMs = Math.floor(currentTime * 1000);
134+
currentTime += chapter.duration;
135+
const endMs = Math.floor(currentTime * 1000);
136+
137+
metadata.push(
138+
`[CHAPTER]`,
139+
`TIMEBASE=1/1000`,
140+
`START=${startMs}`,
141+
`END=${endMs}`,
142+
`title=${chapter.title}`
143+
);
144+
});
145+
146+
await writeFile(metadataPath, ';FFMETADATA1\n' + metadata.join('\n'));
147+
148+
// Create list file for concat
149+
const listPath = join(tempDir, `${id}-list.txt`);
150+
await writeFile(
151+
listPath,
152+
chapterFiles.map(f => `file '${f.path}'`).join('\n')
153+
);
154+
155+
// Combine all files into a single M4B
156+
await runFFmpeg([
157+
'-f', 'concat',
158+
'-safe', '0',
159+
'-i', listPath,
160+
'-i', metadataPath,
161+
'-map_metadata', '1',
162+
'-c:a', 'aac',
163+
'-b:a', '192k',
164+
'-movflags', '+faststart',
165+
outputPath
166+
]);
167+
168+
// Read the converted file
169+
const m4bData = await readFile(outputPath);
170+
171+
// Clean up temp files
172+
await Promise.all([
173+
...chapterFiles.map(f => unlink(f.path)),
174+
unlink(metadataPath),
175+
unlink(listPath),
176+
unlink(outputPath),
177+
rmdir(intermediateDir)
178+
].map(p => p.catch(console.error)));
179+
180+
return new NextResponse(m4bData, {
181+
headers: {
182+
'Content-Type': 'audio/mp4',
183+
},
184+
});
185+
} catch (error) {
186+
console.error('Error converting audio:', error);
187+
return NextResponse.json(
188+
{ error: 'Failed to convert audio format' },
189+
{ status: 500 }
190+
);
191+
}
192+
}

src/app/api/tts/route.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ export async function POST(req: NextRequest) {
66
// Get API credentials from headers or fall back to environment variables
77
const openApiKey = req.headers.get('x-openai-key') || process.env.API_KEY || 'none';
88
const openApiBaseUrl = req.headers.get('x-openai-base-url') || process.env.API_BASE;
9-
const { text, voice, speed } = await req.json();
10-
console.log('Received TTS request:', text, voice, speed);
9+
const { text, voice, speed, format } = await req.json();
10+
console.log('Received TTS request:', text, voice, speed, format);
1111

1212
if (!openApiKey) {
1313
return NextResponse.json({ error: 'Missing OpenAI API key' }, { status: 401 });
@@ -29,14 +29,20 @@ export async function POST(req: NextRequest) {
2929
voice: voice as "alloy",
3030
input: text,
3131
speed: speed,
32+
response_format: format === 'aac' ? 'aac' : 'mp3',
3233
}, { signal: req.signal }); // Pass the abort signal to OpenAI client
3334

3435
// Get the audio data as array buffer
3536
// This will also be aborted if the client cancels
3637
const stream = response.body;
3738

3839
// Return audio data with appropriate headers
39-
return new NextResponse(stream);
40+
const contentType = format === 'aac' ? 'audio/aac' : 'audio/mpeg';
41+
return new NextResponse(stream, {
42+
headers: {
43+
'Content-Type': contentType
44+
}
45+
});
4046
} catch (error) {
4147
// Check if this was an abort error
4248
if (error instanceof Error && error.name === 'AbortError') {

0 commit comments

Comments
 (0)