Skip to content

Commit b67aea6

Browse files
committed
Refactor audio conversion API to improve memory management and add streaming response
1 parent 4a2d75e commit b67aea6

File tree

6 files changed

+421
-135
lines changed

6 files changed

+421
-135
lines changed

src/app/api/audio/convert/route.ts

Lines changed: 83 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { NextRequest, NextResponse } from 'next/server';
22
import { spawn } from 'child_process';
3-
import { writeFile, readFile, mkdir, unlink, rmdir } from 'fs/promises';
3+
import { writeFile, mkdir, unlink, rmdir } from 'fs/promises';
4+
import { createReadStream } from 'fs';
45
import { existsSync } from 'fs';
56
import { join } from 'path';
67
import { randomUUID } from 'crypto';
@@ -65,9 +66,19 @@ async function runFFmpeg(args: string[]): Promise<void> {
6566
});
6667
}
6768

69+
async function cleanup(files: string[], directories: string[]) {
70+
await Promise.all([
71+
...files.map(f => unlink(f).catch(console.error)),
72+
...directories.map(d => rmdir(d).catch(console.error))
73+
]);
74+
}
75+
6876
export async function POST(request: NextRequest) {
77+
const tempFiles: string[] = [];
78+
const tempDirs: string[] = [];
79+
6980
try {
70-
// Parse the request body
81+
// Parse the request body as a stream
7182
const data: ConversionRequest = await request.json();
7283

7384
// Create temp directory if it doesn't exist
@@ -82,40 +93,65 @@ export async function POST(request: NextRequest) {
8293
const metadataPath = join(tempDir, `${id}.txt`);
8394
const intermediateDir = join(tempDir, `${id}-intermediate`);
8495

96+
tempFiles.push(outputPath, metadataPath);
97+
tempDirs.push(intermediateDir);
98+
8599
// Create intermediate directory
86100
if (!existsSync(intermediateDir)) {
87101
await mkdir(intermediateDir);
88102
}
89103

90-
// Process each chapter - no need for initial conversion since input is WAV
104+
// Process chapters sequentially to avoid memory issues
91105
const chapterFiles: { path: string; title: string; duration: number }[] = [];
92106
let currentTime = 0;
93107

94108
for (let i = 0; i < data.chapters.length; i++) {
95109
const chapter = data.chapters[i];
110+
const inputPath = join(intermediateDir, `${i}-input.mp3`);
96111
const outputPath = join(intermediateDir, `${i}.wav`);
97112

98-
// Write the chapter audio directly since it's already WAV
99-
await writeFile(outputPath, Buffer.from(new Uint8Array(chapter.buffer)));
113+
tempFiles.push(inputPath, outputPath);
114+
115+
// Write the chapter audio to a temp file using a Buffer chunk size of 64KB
116+
const chunkSize = 64 * 1024; // 64KB chunks
117+
const buffer = Buffer.from(new Uint8Array(chapter.buffer));
118+
const chunks: Buffer[] = [];
119+
120+
for (let offset = 0; offset < buffer.length; offset += chunkSize) {
121+
chunks.push(buffer.slice(offset, offset + chunkSize));
122+
}
123+
124+
await writeFile(inputPath, Buffer.concat(chunks));
125+
chunks.length = 0; // Clear chunks array
126+
127+
// Convert to WAV with consistent format
128+
await runFFmpeg([
129+
'-i', inputPath,
130+
'-acodec', 'pcm_s16le',
131+
'-ar', '44100',
132+
'-ac', '2',
133+
outputPath
134+
]);
100135

101-
// Get the duration of this chapter
102136
const duration = await getAudioDuration(outputPath);
103137

104138
chapterFiles.push({
105139
path: outputPath,
106140
title: chapter.title,
107141
duration
108142
});
143+
144+
// Clean up input file early
145+
await unlink(inputPath).catch(console.error);
146+
const index = tempFiles.indexOf(inputPath);
147+
if (index > -1) {
148+
tempFiles.splice(index, 1);
149+
}
109150
}
110151

111152
// Create chapter metadata file
112153
const metadata: string[] = [];
113-
metadata.push(
114-
`title=Kokoro Audiobook`,
115-
`artist=KokoroTTS`,
116-
);
117154

118-
// Calculate chapter timings based on actual durations
119155
chapterFiles.forEach((chapter) => {
120156
const startMs = Math.floor(currentTime * 1000);
121157
currentTime += chapter.duration;
@@ -134,6 +170,8 @@ export async function POST(request: NextRequest) {
134170

135171
// Create list file for concat
136172
const listPath = join(tempDir, `${id}-list.txt`);
173+
tempFiles.push(listPath);
174+
137175
await writeFile(
138176
listPath,
139177
chapterFiles.map(f => `file '${f.path}'`).join('\n')
@@ -152,24 +190,46 @@ export async function POST(request: NextRequest) {
152190
outputPath
153191
]);
154192

155-
// Read the converted file
156-
const m4bData = await readFile(outputPath);
157-
158-
// Clean up temp files
159-
await Promise.all([
160-
...chapterFiles.map(f => unlink(f.path)),
161-
unlink(metadataPath),
162-
unlink(listPath),
163-
unlink(outputPath),
164-
rmdir(intermediateDir)
165-
].map(p => p.catch(console.error)));
193+
// Create a readable stream from the output file
194+
const fileStream = createReadStream(outputPath);
195+
196+
// Create a web-compatible ReadableStream from the Node.js stream
197+
const webStream = new ReadableStream({
198+
start(controller) {
199+
fileStream.on('data', (chunk) => {
200+
controller.enqueue(chunk);
201+
});
202+
203+
fileStream.on('end', () => {
204+
controller.close();
205+
// Clean up only after the stream has been fully sent
206+
cleanup(tempFiles, tempDirs).catch(console.error);
207+
});
208+
209+
fileStream.on('error', (error) => {
210+
console.error('Stream error:', error);
211+
controller.error(error);
212+
cleanup(tempFiles, tempDirs).catch(console.error);
213+
});
214+
},
215+
cancel() {
216+
fileStream.destroy();
217+
cleanup(tempFiles, tempDirs).catch(console.error);
218+
}
219+
});
166220

167-
return new NextResponse(m4bData, {
221+
// Return the streaming response
222+
return new NextResponse(webStream, {
168223
headers: {
169224
'Content-Type': 'audio/mp4',
225+
'Transfer-Encoding': 'chunked'
170226
},
171227
});
228+
172229
} catch (error) {
230+
// Clean up in case of error
231+
await cleanup(tempFiles, tempDirs).catch(console.error);
232+
173233
console.error('Error converting audio:', error);
174234
return NextResponse.json(
175235
{ error: 'Failed to convert audio format' },

src/app/api/tts/route.ts

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ export async function POST(req: NextRequest) {
66
// Get API credentials from headers or fall back to environment variables
77
const openApiKey = req.headers.get('x-openai-key') || process.env.API_KEY || 'none';
88
const openApiBaseUrl = req.headers.get('x-openai-base-url') || process.env.API_BASE;
9-
const { text, voice, speed, format } = await req.json();
10-
console.log('Received TTS request:', text, voice, speed, format);
9+
const { text, voice, speed } = await req.json();
10+
console.log('Received TTS request:', text, voice, speed);
1111

1212
if (!openApiKey) {
1313
return NextResponse.json({ error: 'Missing OpenAI API key' }, { status: 401 });
@@ -29,19 +29,16 @@ export async function POST(req: NextRequest) {
2929
voice: voice as "alloy",
3030
input: text,
3131
speed: speed,
32-
// Use wav format for audiobook generation to avoid initial conversion
33-
response_format: format === 'audiobook' ? 'wav' : (format === 'aac' ? 'aac' : 'mp3'),
34-
}, { signal: req.signal }); // Pass the abort signal to OpenAI client
32+
response_format: 'mp3', // Always use mp3 since we convert to WAV later if needed
33+
}, { signal: req.signal });
3534

3635
// Get the audio data as array buffer
37-
// This will also be aborted if the client cancels
3836
const stream = response.body;
3937

4038
// Return audio data with appropriate headers
41-
const contentType = format === 'audiobook' ? 'audio/wav' : (format === 'aac' ? 'audio/aac' : 'audio/mpeg');
4239
return new NextResponse(stream, {
4340
headers: {
44-
'Content-Type': contentType
41+
'Content-Type': 'audio/mpeg'
4542
}
4643
});
4744
} catch (error) {

src/components/DocumentSettings.tsx

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import { useConfig, ViewType } from '@/contexts/ConfigContext';
66
import { ChevronUpDownIcon, CheckIcon } from '@/components/icons/Icons';
77
import { useEPUB } from '@/contexts/EPUBContext';
88
import { usePDF } from '@/contexts/PDFContext';
9+
import { useTimeEstimation } from '@/hooks/useTimeEstimation';
10+
import { LoadingSpinner } from './Spinner';
911

1012
const isDev = process.env.NEXT_PUBLIC_NODE_ENV !== 'production' || process.env.NODE_ENV == null;
1113

@@ -37,9 +39,9 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
3739
rightMargin,
3840
updateConfigKey
3941
} = useConfig();
40-
const { createFullAudioBook } = useEPUB();
41-
const { createFullAudioBook: createPDFAudioBook } = usePDF();
42-
const [progress, setProgress] = useState(0);
42+
const { createFullAudioBook, isAudioCombining } = useEPUB();
43+
const { createFullAudioBook: createPDFAudioBook, isAudioCombining: isPDFAudioCombining } = usePDF();
44+
const { progress, setProgress, estimatedTimeRemaining } = useTimeEstimation();
4345
const [isGenerating, setIsGenerating] = useState(false);
4446
const [audioFormat, setAudioFormat] = useState<'mp3' | 'm4b'>('mp3');
4547
const [localMargins, setLocalMargins] = useState({
@@ -116,7 +118,7 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
116118
setProgress(0);
117119
abortControllerRef.current = null;
118120
}
119-
}, [createFullAudioBook, createPDFAudioBook, epub, audioFormat]);
121+
}, [createFullAudioBook, createPDFAudioBook, epub, audioFormat, setProgress]);
120122

121123
const handleCancel = () => {
122124
if (abortControllerRef.current) {
@@ -195,7 +197,10 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
195197
/>
196198
</div>
197199
<div className="flex justify-between items-center text-sm text-muted">
198-
<span>{Math.round(progress)}% complete</span>
200+
<span>
201+
{Math.round(progress)}% complete
202+
{estimatedTimeRemaining && ` • ${estimatedTimeRemaining} remaining`}
203+
</span>
199204
<Button
200205
type="button"
201206
className="inline-flex justify-center rounded-lg px-2.5 py-1 text-sm
@@ -204,7 +209,13 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
204209
transform transition-transform duration-200 ease-in-out hover:scale-[1.02]"
205210
onClick={handleCancel}
206211
>
207-
Cancel and download
212+
{(epub ? isAudioCombining : isPDFAudioCombining) ? (
213+
<div className="w-full h-full flex items-center justify-end">
214+
<LoadingSpinner />
215+
</div>
216+
) : (
217+
'Cancel and download'
218+
)}
208219
</Button>
209220
</div>
210221
</div>

0 commit comments

Comments
 (0)