Skip to content

Commit e3370f7

Browse files
committed
Further audiobook extraction optimizations
1 parent 568c9f2 commit e3370f7

File tree

5 files changed

+73
-58
lines changed

5 files changed

+73
-58
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ https://github.com/user-attachments/assets/262b9a01-c608-4fee-893c-9461dd48c99b
4242
docker run --name openreader-webui \
4343
-p 3003:3003 \
4444
-v openreader_docstore:/app/docstore \
45-
richardr1126/openreader-webui:v0.2.2-alpine
45+
richardr1126/openreader-webui:latest
4646
```
4747

4848
(Optionally): Set the TTS `API_BASE` URL and/or `API_KEY` to be default for all devices
@@ -51,7 +51,7 @@ docker run --name openreader-webui \
5151
-e API_BASE=http://host.docker.internal:8880/v1 \
5252
-p 3003:3003 \
5353
-v openreader_docstore:/app/docstore \
54-
richardr1126/openreader-webui:v0.2.2-alpine
54+
richardr1126/openreader-webui:latest
5555
```
5656

5757
> Requesting audio from the TTS API happens on the Next.js server not the client. So the base URL for the TTS API should be accessible and relative to the Next.js server. If it is in a Docker you may need to use `host.docker.internal` to access the host machine, instead of `localhost`.

src/app/api/audio/convert/route.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ export async function POST(request: NextRequest) {
108108
for (let i = 0; i < data.chapters.length; i++) {
109109
const chapter = data.chapters[i];
110110
const inputPath = join(intermediateDir, `${i}-input.mp3`);
111-
const outputPath = join(intermediateDir, `${i}.wav`);
111+
const outputPath = join(intermediateDir, `${i}.aac`);
112112

113113
tempFiles.push(inputPath, outputPath);
114114

@@ -124,12 +124,10 @@ export async function POST(request: NextRequest) {
124124
await writeFile(inputPath, Buffer.concat(chunks));
125125
chunks.length = 0; // Clear chunks array
126126

127-
// Convert to WAV with consistent format
127+
// Copy to AAC format for compatibility with M4B
128128
await runFFmpeg([
129129
'-i', inputPath,
130-
'-acodec', 'pcm_s16le',
131-
'-ar', '44100',
132-
'-ac', '2',
130+
'-c:a', 'copy', // Use copy instead of re-encoding
133131
outputPath
134132
]);
135133

@@ -177,16 +175,18 @@ export async function POST(request: NextRequest) {
177175
chapterFiles.map(f => `file '${f.path}'`).join('\n')
178176
);
179177

180-
// Combine all files into a single M4B
178+
// Combine all files into a single M4B with optimized settings
181179
await runFFmpeg([
182180
'-f', 'concat',
183181
'-safe', '0',
184182
'-i', listPath,
185183
'-i', metadataPath,
186184
'-map_metadata', '1',
187-
'-c:a', 'aac',
188-
'-b:a', '192k',
185+
'-c:a', 'copy', // Use macOS AudioToolbox AAC encoder
186+
//'-b:a', '192k',
187+
'-threads', '0', // Use maximum available threads
189188
'-movflags', '+faststart',
189+
'-preset', 'ultrafast', // Use fastest encoding preset
190190
outputPath
191191
]);
192192

src/app/api/tts/route.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ export async function POST(req: NextRequest) {
66
// Get API credentials from headers or fall back to environment variables
77
const openApiKey = req.headers.get('x-openai-key') || process.env.API_KEY || 'none';
88
const openApiBaseUrl = req.headers.get('x-openai-base-url') || process.env.API_BASE;
9-
const { text, voice, speed } = await req.json();
10-
console.log('Received TTS request:', text, voice, speed);
9+
const { text, voice, speed, format } = await req.json();
10+
console.log('Received TTS request:', text, voice, speed, format);
1111

1212
if (!openApiKey) {
1313
return NextResponse.json({ error: 'Missing OpenAI API key' }, { status: 401 });
@@ -29,16 +29,18 @@ export async function POST(req: NextRequest) {
2929
voice: voice as "alloy",
3030
input: text,
3131
speed: speed,
32-
response_format: 'mp3', // Always use mp3 since we convert to WAV later if needed
33-
}, { signal: req.signal });
32+
response_format: format === 'aac' ? 'aac' : 'mp3',
33+
}, { signal: req.signal }); // Pass the abort signal to OpenAI client
3434

3535
// Get the audio data as array buffer
36+
// This will also be aborted if the client cancels
3637
const stream = response.body;
3738

3839
// Return audio data with appropriate headers
40+
const contentType = format === 'aac' ? 'audio/aac' : 'audio/mpeg';
3941
return new NextResponse(stream, {
4042
headers: {
41-
'Content-Type': 'audio/mpeg'
43+
'Content-Type': contentType
4244
}
4345
});
4446
} catch (error) {

src/contexts/EPUBContext.tsx

Lines changed: 55 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -144,39 +144,40 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
144144
* Extracts text content from the entire EPUB book
145145
* @returns {Promise<string[]>} Array of text content from each section
146146
*/
147-
const extractBookText = useCallback(async (): Promise<string[]> => {
147+
const extractBookText = useCallback(async (): Promise<Array<{ text: string; href: string }>> => {
148148
try {
149-
if (!bookRef.current || !bookRef.current.isOpen) return [''];
149+
if (!bookRef.current || !bookRef.current.isOpen) return [{ text: '', href: '' }];
150150

151151
const book = bookRef.current;
152152
const spine = book.spine;
153-
const promises: Promise<string>[] = [];
153+
const promises: Promise<{ text: string; href: string }>[] = [];
154154

155155
spine.each((item: SpineItem) => {
156156
const url = item.href || '';
157157
if (!url) return;
158+
//console.log('Extracting text from section:', item as SpineItem);
158159

159160
const promise = book.load(url)
160161
.then((section) => (section as Document))
161-
.then((section) => {
162-
const textContent = section.body.textContent || '';
163-
return textContent;
164-
})
162+
.then((section) => ({
163+
text: section.body.textContent || '',
164+
href: url
165+
}))
165166
.catch((err) => {
166167
console.error(`Error loading section ${url}:`, err);
167-
return '';
168+
return { text: '', href: url };
168169
});
169170

170171
promises.push(promise);
171172
});
172173

173174
const textArray = await Promise.all(promises);
174-
const filteredArray = textArray.filter(text => text.trim() !== '');
175+
const filteredArray = textArray.filter(item => item.text.trim() !== '');
175176
console.log('Extracted entire EPUB text array:', filteredArray);
176177
return filteredArray;
177178
} catch (error) {
178179
console.error('Error extracting EPUB text:', error);
179-
return [''];
180+
return [{ text: '', href: '' }];
180181
}
181182
}, []);
182183

@@ -189,29 +190,55 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
189190
format: 'mp3' | 'm4b' = 'mp3'
190191
): Promise<ArrayBuffer> => {
191192
try {
192-
const textArray = await extractBookText();
193-
if (!textArray.length) throw new Error('No text content found in book');
193+
const sections = await extractBookText();
194+
if (!sections.length) throw new Error('No text content found in book');
194195

195-
// Calculate total text length for accurate progress tracking
196-
const totalLength = textArray.reduce((sum, text) => sum + text.trim().length, 0);
196+
// Calculate total length for accurate progress tracking
197+
const totalLength = sections.reduce((sum, section) => sum + section.text.trim().length, 0);
197198
const audioChunks: { buffer: ArrayBuffer; title?: string; startTime: number }[] = [];
198199
let processedLength = 0;
199200
let currentTime = 0;
200201

201-
// Get TOC for chapter titles if available
202+
// Get TOC for chapter titles
202203
const chapters = tocRef.current || [];
203-
const spine = bookRef.current?.spine;
204+
console.log('Chapter map:', chapters);
204205

205-
for (const text of textArray) {
206+
// Create a map of section hrefs to their chapter titles
207+
const sectionTitleMap = new Map<string, string>();
208+
209+
// First, loop through all chapters to create the mapping
210+
for (const chapter of chapters) {
211+
if (!chapter.href) continue;
212+
213+
const chapterBaseHref = chapter.href.split('#')[0];
214+
const chapterTitle = chapter.label.trim();
215+
216+
// For each chapter, find all matching sections
217+
for (const section of sections) {
218+
const sectionHref = section.href;
219+
const sectionBaseHref = sectionHref.split('#')[0];
220+
221+
// If this section matches this chapter, map it
222+
if (sectionHref === chapter.href || sectionBaseHref === chapterBaseHref) {
223+
sectionTitleMap.set(sectionHref, chapterTitle);
224+
}
225+
}
226+
}
227+
228+
console.log('Section to chapter title mapping:', sectionTitleMap);
229+
230+
// Process each section
231+
for (let i = 0; i < sections.length; i++) {
206232
if (signal?.aborted) {
207233
const partialBuffer = await combineAudioChunks(audioChunks, format);
208234
return partialBuffer;
209235
}
210236

211-
try {
212-
const trimmedText = text.trim();
213-
if (!trimmedText) continue;
237+
const section = sections[i];
238+
const trimmedText = section.text.trim();
239+
if (!trimmedText) continue;
214240

241+
try {
215242
const ttsResponse = await fetch('/api/tts', {
216243
method: 'POST',
217244
headers: {
@@ -222,7 +249,7 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
222249
text: trimmedText,
223250
voice: voice,
224251
speed: voiceSpeed,
225-
format: 'audiobook'
252+
format: format === 'm4b' ? 'aac' : 'mp3',
226253
}),
227254
signal
228255
});
@@ -236,27 +263,15 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
236263
throw new Error('Received empty audio buffer from TTS');
237264
}
238265

239-
// Find matching chapter title from TOC if available
240-
let chapterTitle;
241-
if (spine && chapters.length > 0) {
242-
let spineIndex = processedLength;
243-
let currentSpineHref: string | undefined;
244-
245-
spine.each((item: SpineItem) => {
246-
if (spineIndex === 0) {
247-
currentSpineHref = item.href;
248-
}
249-
spineIndex--;
250-
});
251-
252-
const matchingChapter = chapters.find(chapter =>
253-
chapter.href && currentSpineHref?.includes(chapter.href)
254-
);
255-
chapterTitle = matchingChapter?.label || `Section ${processedLength + 1}`;
256-
} else {
257-
chapterTitle = `Section ${processedLength + 1}`;
266+
// Get the chapter title from our pre-computed map
267+
let chapterTitle = sectionTitleMap.get(section.href);
268+
269+
// If no chapter title found, use index-based naming
270+
if (!chapterTitle) {
271+
chapterTitle = `Unknown Section - ${i + 1}`;
258272
}
259273

274+
console.log('Processed audiobook chapter title:', chapterTitle);
260275
audioChunks.push({
261276
buffer: audioBuffer,
262277
title: chapterTitle,
@@ -271,8 +286,6 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
271286
});
272287

273288
currentTime += (audioBuffer.byteLength + 48000) / 48000;
274-
275-
// Update progress based on processed text length
276289
processedLength += trimmedText.length;
277290
onProgress((processedLength / totalLength) * 100);
278291

src/contexts/PDFContext.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ export function PDFProvider({ children }: { children: ReactNode }) {
246246
text,
247247
voice: voice,
248248
speed: voiceSpeed,
249-
format: 'audiobook'
249+
format: format === 'm4b' ? 'aac' : 'mp3'
250250
}),
251251
signal
252252
});

0 commit comments

Comments
 (0)