Skip to content

Commit 4a2d75e

Browse files
committed
Export to m4b from WAV efficiently
1 parent 79d2e7f commit 4a2d75e

File tree

4 files changed

+10
-23
lines changed

4 files changed

+10
-23
lines changed

src/app/api/audio/convert/route.ts

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -87,26 +87,16 @@ export async function POST(request: NextRequest) {
8787
await mkdir(intermediateDir);
8888
}
8989

90-
// First, write each chapter to a temporary file and get its duration
90+
// Process each chapter - no need for initial conversion since input is WAV
9191
const chapterFiles: { path: string; title: string; duration: number }[] = [];
9292
let currentTime = 0;
9393

9494
for (let i = 0; i < data.chapters.length; i++) {
9595
const chapter = data.chapters[i];
96-
const inputPath = join(intermediateDir, `${i}-input.aac`);
9796
const outputPath = join(intermediateDir, `${i}.wav`);
9897

99-
// Write the chapter audio to a temp file
100-
await writeFile(inputPath, Buffer.from(new Uint8Array(chapter.buffer)));
101-
102-
// Convert to WAV with consistent format (this helps with timestamp issues)
103-
await runFFmpeg([
104-
'-i', inputPath,
105-
'-acodec', 'pcm_s16le',
106-
'-ar', '44100',
107-
'-ac', '2',
108-
outputPath
109-
]);
98+
// Write the chapter audio directly since it's already WAV
99+
await writeFile(outputPath, Buffer.from(new Uint8Array(chapter.buffer)));
110100

111101
// Get the duration of this chapter
112102
const duration = await getAudioDuration(outputPath);
@@ -116,9 +106,6 @@ export async function POST(request: NextRequest) {
116106
title: chapter.title,
117107
duration
118108
});
119-
120-
// Clean up input file
121-
await unlink(inputPath).catch(console.error);
122109
}
123110

124111
// Create chapter metadata file
@@ -129,7 +116,7 @@ export async function POST(request: NextRequest) {
129116
);
130117

131118
// Calculate chapter timings based on actual durations
132-
chapterFiles.forEach((chapter, index) => {
119+
chapterFiles.forEach((chapter) => {
133120
const startMs = Math.floor(currentTime * 1000);
134121
currentTime += chapter.duration;
135122
const endMs = Math.floor(currentTime * 1000);

src/app/api/tts/route.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,16 @@ export async function POST(req: NextRequest) {
2929
voice: voice as "alloy",
3030
input: text,
3131
speed: speed,
32-
response_format: format === 'aac' ? 'aac' : 'mp3',
32+
// Use wav format for audiobook generation to avoid initial conversion
33+
response_format: format === 'audiobook' ? 'wav' : (format === 'aac' ? 'aac' : 'mp3'),
3334
}, { signal: req.signal }); // Pass the abort signal to OpenAI client
3435

3536
// Get the audio data as array buffer
3637
// This will also be aborted if the client cancels
3738
const stream = response.body;
3839

3940
// Return audio data with appropriate headers
40-
const contentType = format === 'aac' ? 'audio/aac' : 'audio/mpeg';
41+
const contentType = format === 'audiobook' ? 'audio/wav' : (format === 'aac' ? 'audio/aac' : 'audio/mpeg');
4142
return new NextResponse(stream, {
4243
headers: {
4344
'Content-Type': contentType

src/contexts/EPUBContext.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
224224
text: text.trim(),
225225
voice: voice,
226226
speed: voiceSpeed,
227-
format: 'aac'
227+
format: 'audiobook' // Request WAV format directly
228228
}),
229229
signal
230230
});
@@ -244,7 +244,7 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
244244
let spineIndex = processedSections;
245245
let currentSpineHref: string | undefined;
246246

247-
spine.each((item: any) => {
247+
spine.each((item: SpineItem) => {
248248
if (spineIndex === 0) {
249249
currentSpineHref = item.href;
250250
}

src/contexts/PDFContext.tsx

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ import {
3636
} from '@/utils/pdf';
3737

3838
import type { PDFDocumentProxy } from 'pdfjs-dist';
39-
import { useParams } from 'next/navigation';
4039

4140
/**
4241
* Interface defining all available methods and properties in the PDF context
@@ -234,7 +233,7 @@ export function PDFProvider({ children }: { children: ReactNode }) {
234233
text: text.trim(),
235234
voice: voice,
236235
speed: voiceSpeed,
237-
format: 'aac'
236+
format: 'audiobook' // Request WAV format directly
238237
}),
239238
signal
240239
});

0 commit comments

Comments
 (0)