Skip to content

Commit 129e89b

Browse files
authored
Merge pull request #31 from richardr1126/export-audiobook
Export audiobook + PDF extraction margins
2 parents 61953a7 + b67aea6 commit 129e89b

File tree

12 files changed

+1387
-175
lines changed

12 files changed

+1387
-175
lines changed

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Use Node.js slim image
22
FROM node:slim
33

4+
# Add ffmpeg
5+
RUN apt-get update && apt-get install -y ffmpeg
6+
47
# Create app directory
58
WORKDIR /app
69

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ services:
103103

104104
3. Configure the environment:
105105
```bash
106-
cp .env.template .env
106+
cp template.env .env
107107
# Edit .env with your configuration settings
108108
```
109109
> Note: The base URL for the TTS API should be accessible and relative to the Next.js server

src/app/api/audio/convert/route.ts

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
import { NextRequest, NextResponse } from 'next/server';
2+
import { spawn } from 'child_process';
3+
import { writeFile, mkdir, unlink, rmdir } from 'fs/promises';
4+
import { createReadStream } from 'fs';
5+
import { existsSync } from 'fs';
6+
import { join } from 'path';
7+
import { randomUUID } from 'crypto';
8+
9+
interface Chapter {
10+
title: string;
11+
buffer: number[];
12+
}
13+
14+
interface ConversionRequest {
15+
chapters: Chapter[];
16+
}
17+
18+
async function getAudioDuration(filePath: string): Promise<number> {
19+
return new Promise((resolve, reject) => {
20+
const ffprobe = spawn('ffprobe', [
21+
'-i', filePath,
22+
'-show_entries', 'format=duration',
23+
'-v', 'quiet',
24+
'-of', 'csv=p=0'
25+
]);
26+
27+
let output = '';
28+
ffprobe.stdout.on('data', (data) => {
29+
output += data.toString();
30+
});
31+
32+
ffprobe.on('close', (code) => {
33+
if (code === 0) {
34+
const duration = parseFloat(output.trim());
35+
resolve(duration);
36+
} else {
37+
reject(new Error(`ffprobe process exited with code ${code}`));
38+
}
39+
});
40+
41+
ffprobe.on('error', (err) => {
42+
reject(err);
43+
});
44+
});
45+
}
46+
47+
async function runFFmpeg(args: string[]): Promise<void> {
48+
return new Promise<void>((resolve, reject) => {
49+
const ffmpeg = spawn('ffmpeg', args);
50+
51+
ffmpeg.stderr.on('data', (data) => {
52+
console.error(`ffmpeg stderr: ${data}`);
53+
});
54+
55+
ffmpeg.on('close', (code) => {
56+
if (code === 0) {
57+
resolve();
58+
} else {
59+
reject(new Error(`FFmpeg process exited with code ${code}`));
60+
}
61+
});
62+
63+
ffmpeg.on('error', (err) => {
64+
reject(err);
65+
});
66+
});
67+
}
68+
69+
async function cleanup(files: string[], directories: string[]) {
70+
await Promise.all([
71+
...files.map(f => unlink(f).catch(console.error)),
72+
...directories.map(d => rmdir(d).catch(console.error))
73+
]);
74+
}
75+
76+
export async function POST(request: NextRequest) {
77+
const tempFiles: string[] = [];
78+
const tempDirs: string[] = [];
79+
80+
try {
81+
// Parse the request body as a stream
82+
const data: ConversionRequest = await request.json();
83+
84+
// Create temp directory if it doesn't exist
85+
const tempDir = join(process.cwd(), 'temp');
86+
if (!existsSync(tempDir)) {
87+
await mkdir(tempDir);
88+
}
89+
90+
// Generate unique filenames
91+
const id = randomUUID();
92+
const outputPath = join(tempDir, `${id}.m4b`);
93+
const metadataPath = join(tempDir, `${id}.txt`);
94+
const intermediateDir = join(tempDir, `${id}-intermediate`);
95+
96+
tempFiles.push(outputPath, metadataPath);
97+
tempDirs.push(intermediateDir);
98+
99+
// Create intermediate directory
100+
if (!existsSync(intermediateDir)) {
101+
await mkdir(intermediateDir);
102+
}
103+
104+
// Process chapters sequentially to avoid memory issues
105+
const chapterFiles: { path: string; title: string; duration: number }[] = [];
106+
let currentTime = 0;
107+
108+
for (let i = 0; i < data.chapters.length; i++) {
109+
const chapter = data.chapters[i];
110+
const inputPath = join(intermediateDir, `${i}-input.mp3`);
111+
const outputPath = join(intermediateDir, `${i}.wav`);
112+
113+
tempFiles.push(inputPath, outputPath);
114+
115+
// Write the chapter audio to a temp file using a Buffer chunk size of 64KB
116+
const chunkSize = 64 * 1024; // 64KB chunks
117+
const buffer = Buffer.from(new Uint8Array(chapter.buffer));
118+
const chunks: Buffer[] = [];
119+
120+
for (let offset = 0; offset < buffer.length; offset += chunkSize) {
121+
chunks.push(buffer.slice(offset, offset + chunkSize));
122+
}
123+
124+
await writeFile(inputPath, Buffer.concat(chunks));
125+
chunks.length = 0; // Clear chunks array
126+
127+
// Convert to WAV with consistent format
128+
await runFFmpeg([
129+
'-i', inputPath,
130+
'-acodec', 'pcm_s16le',
131+
'-ar', '44100',
132+
'-ac', '2',
133+
outputPath
134+
]);
135+
136+
const duration = await getAudioDuration(outputPath);
137+
138+
chapterFiles.push({
139+
path: outputPath,
140+
title: chapter.title,
141+
duration
142+
});
143+
144+
// Clean up input file early
145+
await unlink(inputPath).catch(console.error);
146+
const index = tempFiles.indexOf(inputPath);
147+
if (index > -1) {
148+
tempFiles.splice(index, 1);
149+
}
150+
}
151+
152+
// Create chapter metadata file
153+
const metadata: string[] = [];
154+
155+
chapterFiles.forEach((chapter) => {
156+
const startMs = Math.floor(currentTime * 1000);
157+
currentTime += chapter.duration;
158+
const endMs = Math.floor(currentTime * 1000);
159+
160+
metadata.push(
161+
`[CHAPTER]`,
162+
`TIMEBASE=1/1000`,
163+
`START=${startMs}`,
164+
`END=${endMs}`,
165+
`title=${chapter.title}`
166+
);
167+
});
168+
169+
await writeFile(metadataPath, ';FFMETADATA1\n' + metadata.join('\n'));
170+
171+
// Create list file for concat
172+
const listPath = join(tempDir, `${id}-list.txt`);
173+
tempFiles.push(listPath);
174+
175+
await writeFile(
176+
listPath,
177+
chapterFiles.map(f => `file '${f.path}'`).join('\n')
178+
);
179+
180+
// Combine all files into a single M4B
181+
await runFFmpeg([
182+
'-f', 'concat',
183+
'-safe', '0',
184+
'-i', listPath,
185+
'-i', metadataPath,
186+
'-map_metadata', '1',
187+
'-c:a', 'aac',
188+
'-b:a', '192k',
189+
'-movflags', '+faststart',
190+
outputPath
191+
]);
192+
193+
// Create a readable stream from the output file
194+
const fileStream = createReadStream(outputPath);
195+
196+
// Create a web-compatible ReadableStream from the Node.js stream
197+
const webStream = new ReadableStream({
198+
start(controller) {
199+
fileStream.on('data', (chunk) => {
200+
controller.enqueue(chunk);
201+
});
202+
203+
fileStream.on('end', () => {
204+
controller.close();
205+
// Clean up only after the stream has been fully sent
206+
cleanup(tempFiles, tempDirs).catch(console.error);
207+
});
208+
209+
fileStream.on('error', (error) => {
210+
console.error('Stream error:', error);
211+
controller.error(error);
212+
cleanup(tempFiles, tempDirs).catch(console.error);
213+
});
214+
},
215+
cancel() {
216+
fileStream.destroy();
217+
cleanup(tempFiles, tempDirs).catch(console.error);
218+
}
219+
});
220+
221+
// Return the streaming response
222+
return new NextResponse(webStream, {
223+
headers: {
224+
'Content-Type': 'audio/mp4',
225+
'Transfer-Encoding': 'chunked'
226+
},
227+
});
228+
229+
} catch (error) {
230+
// Clean up in case of error
231+
await cleanup(tempFiles, tempDirs).catch(console.error);
232+
233+
console.error('Error converting audio:', error);
234+
return NextResponse.json(
235+
{ error: 'Failed to convert audio format' },
236+
{ status: 500 }
237+
);
238+
}
239+
}

src/app/api/tts/route.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ export async function POST(req: NextRequest) {
2020
// Initialize OpenAI client with abort signal
2121
const openai = new OpenAI({
2222
apiKey: openApiKey,
23-
baseURL: openApiBaseUrl || 'https://api.openai.com/v1',
23+
baseURL: openApiBaseUrl,
2424
});
2525

2626
// Request audio from OpenAI and pass along the abort signal
@@ -29,19 +29,23 @@ export async function POST(req: NextRequest) {
2929
voice: voice as "alloy",
3030
input: text,
3131
speed: speed,
32-
}, { signal: req.signal }); // Pass the abort signal to OpenAI client
32+
response_format: 'mp3', // Always use mp3 since we convert to WAV later if needed
33+
}, { signal: req.signal });
3334

3435
// Get the audio data as array buffer
35-
// This will also be aborted if the client cancels
36-
const arrayBuffer = await response.arrayBuffer();
36+
const stream = response.body;
3737

3838
// Return audio data with appropriate headers
39-
return new NextResponse(arrayBuffer);
39+
return new NextResponse(stream, {
40+
headers: {
41+
'Content-Type': 'audio/mpeg'
42+
}
43+
});
4044
} catch (error) {
4145
// Check if this was an abort error
4246
if (error instanceof Error && error.name === 'AbortError') {
4347
console.log('TTS request aborted by client');
44-
return new Response(null, { status: 499 }); // Use 499 status for client closed request
48+
return new NextResponse(null, { status: 499 }); // Use 499 status for client closed request
4549
}
4650

4751
console.error('Error generating TTS:', error);

0 commit comments

Comments
 (0)