Further audiobook extraction optimizations

richardr1126 · richardr1126 · commit e3370f752644 · 2025-02-26T04:03:05.000-07:00
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ https://github.com/user-attachments/assets/262b9a01-c608-4fee-893c-9461dd48c99b
 docker run --name openreader-webui \
   -p 3003:3003 \
   -v openreader_docstore:/app/docstore \
-  richardr1126/openreader-webui:v0.2.2-alpine
+  richardr1126/openreader-webui:latest
 ```
 
 (Optionally): Set the TTS `API_BASE` URL and/or `API_KEY` to be default for all devices
@@ -51,7 +51,7 @@ docker run --name openreader-webui \
   -e API_BASE=http://host.docker.internal:8880/v1 \
   -p 3003:3003 \
   -v openreader_docstore:/app/docstore \
-  richardr1126/openreader-webui:v0.2.2-alpine
+  richardr1126/openreader-webui:latest
 ```
 
 > Requesting audio from the TTS API happens on the Next.js server not the client. So the base URL for the TTS API should be accessible and relative to the Next.js server. If it is in a Docker you may need to use `host.docker.internal` to access the host machine, instead of `localhost`.
diff --git a/src/app/api/audio/convert/route.ts b/src/app/api/audio/convert/route.ts
@@ -108,7 +108,7 @@ export async function POST(request: NextRequest) {
     for (let i = 0; i < data.chapters.length; i++) {
       const chapter = data.chapters[i];
       const inputPath = join(intermediateDir, `${i}-input.mp3`);
-      const outputPath = join(intermediateDir, `${i}.wav`);
+      const outputPath = join(intermediateDir, `${i}.aac`);
       
       tempFiles.push(inputPath, outputPath);
 
@@ -124,12 +124,10 @@ export async function POST(request: NextRequest) {
       await writeFile(inputPath, Buffer.concat(chunks));
       chunks.length = 0; // Clear chunks array
 
-      // Convert to WAV with consistent format
+      // Copy to AAC format for compatibility with M4B
       await runFFmpeg([
         '-i', inputPath,
-        '-acodec', 'pcm_s16le',
-        '-ar', '44100',
-        '-ac', '2',
+        '-c:a', 'copy', // Use copy instead of re-encoding
         outputPath
       ]);
       
@@ -177,16 +175,18 @@ export async function POST(request: NextRequest) {
       chapterFiles.map(f => `file '${f.path}'`).join('\n')
     );
 
-    // Combine all files into a single M4B
+    // Combine all files into a single M4B with optimized settings
     await runFFmpeg([
       '-f', 'concat',
       '-safe', '0',
       '-i', listPath,
       '-i', metadataPath,
       '-map_metadata', '1',
-      '-c:a', 'aac',
-      '-b:a', '192k',
+      '-c:a', 'copy', // Use macOS AudioToolbox AAC encoder
+      //'-b:a', '192k',
+      '-threads', '0', // Use maximum available threads
       '-movflags', '+faststart',
+      '-preset', 'ultrafast', // Use fastest encoding preset
       outputPath
     ]);
 
diff --git a/src/app/api/tts/route.ts b/src/app/api/tts/route.ts
@@ -6,8 +6,8 @@ export async function POST(req: NextRequest) {
     // Get API credentials from headers or fall back to environment variables
     const openApiKey = req.headers.get('x-openai-key') || process.env.API_KEY || 'none';
     const openApiBaseUrl = req.headers.get('x-openai-base-url') || process.env.API_BASE;
-    const { text, voice, speed } = await req.json();
-    console.log('Received TTS request:', text, voice, speed);
+    const { text, voice, speed, format } = await req.json();
+    console.log('Received TTS request:', text, voice, speed, format);
 
     if (!openApiKey) {
       return NextResponse.json({ error: 'Missing OpenAI API key' }, { status: 401 });
@@ -29,16 +29,18 @@ export async function POST(req: NextRequest) {
       voice: voice as "alloy",
       input: text,
       speed: speed,
-      response_format: 'mp3',  // Always use mp3 since we convert to WAV later if needed
-    }, { signal: req.signal });
+      response_format: format === 'aac' ? 'aac' : 'mp3',
+    }, { signal: req.signal }); // Pass the abort signal to OpenAI client
 
     // Get the audio data as array buffer
+    // This will also be aborted if the client cancels
     const stream = response.body;
 
     // Return audio data with appropriate headers
+    const contentType = format === 'aac' ? 'audio/aac' : 'audio/mpeg';
     return new NextResponse(stream, {
       headers: {
-        'Content-Type': 'audio/mpeg'
+        'Content-Type': contentType
       }
     });
   } catch (error) {
diff --git a/src/contexts/EPUBContext.tsx b/src/contexts/EPUBContext.tsx
@@ -144,39 +144,40 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
    * Extracts text content from the entire EPUB book
    * @returns {Promise<string[]>} Array of text content from each section
    */
-  const extractBookText = useCallback(async (): Promise<string[]> => {
+  const extractBookText = useCallback(async (): Promise<Array<{ text: string; href: string }>> => {
     try {
-      if (!bookRef.current || !bookRef.current.isOpen) return [''];
+      if (!bookRef.current || !bookRef.current.isOpen) return [{ text: '', href: '' }];
 
       const book = bookRef.current;
       const spine = book.spine;
-      const promises: Promise<string>[] = [];
+      const promises: Promise<{ text: string; href: string }>[] = [];
 
       spine.each((item: SpineItem) => {
         const url = item.href || '';
         if (!url) return;
+        //console.log('Extracting text from section:', item as SpineItem);
 
         const promise = book.load(url)
           .then((section) => (section as Document))
-          .then((section) => {
-            const textContent = section.body.textContent || '';
-            return textContent;
-          })
+          .then((section) => ({
+            text: section.body.textContent || '',
+            href: url
+          }))
           .catch((err) => {
             console.error(`Error loading section ${url}:`, err);
-            return '';
+            return { text: '', href: url };
           });
 
         promises.push(promise);
       });
 
       const textArray = await Promise.all(promises);
-      const filteredArray = textArray.filter(text => text.trim() !== '');
+      const filteredArray = textArray.filter(item => item.text.trim() !== '');
       console.log('Extracted entire EPUB text array:', filteredArray);
       return filteredArray;
     } catch (error) {
       console.error('Error extracting EPUB text:', error);
-      return [''];
+      return [{ text: '', href: '' }];
     }
   }, []);
 
@@ -189,29 +190,55 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
     format: 'mp3' | 'm4b' = 'mp3'
   ): Promise<ArrayBuffer> => {
     try {
-      const textArray = await extractBookText();
-      if (!textArray.length) throw new Error('No text content found in book');
+      const sections = await extractBookText();
+      if (!sections.length) throw new Error('No text content found in book');
 
-      // Calculate total text length for accurate progress tracking
-      const totalLength = textArray.reduce((sum, text) => sum + text.trim().length, 0);
+      // Calculate total length for accurate progress tracking
+      const totalLength = sections.reduce((sum, section) => sum + section.text.trim().length, 0);
       const audioChunks: { buffer: ArrayBuffer; title?: string; startTime: number }[] = [];
       let processedLength = 0;
       let currentTime = 0;
 
-      // Get TOC for chapter titles if available
+      // Get TOC for chapter titles
       const chapters = tocRef.current || [];
-      const spine = bookRef.current?.spine;
+      console.log('Chapter map:', chapters);
       
-      for (const text of textArray) {
+      // Create a map of section hrefs to their chapter titles
+      const sectionTitleMap = new Map<string, string>();
+      
+      // First, loop through all chapters to create the mapping
+      for (const chapter of chapters) {
+        if (!chapter.href) continue;
+        
+        const chapterBaseHref = chapter.href.split('#')[0];
+        const chapterTitle = chapter.label.trim();
+        
+        // For each chapter, find all matching sections
+        for (const section of sections) {
+          const sectionHref = section.href;
+          const sectionBaseHref = sectionHref.split('#')[0];
+          
+          // If this section matches this chapter, map it
+          if (sectionHref === chapter.href || sectionBaseHref === chapterBaseHref) {
+            sectionTitleMap.set(sectionHref, chapterTitle);
+          }
+        }
+      }
+      
+      console.log('Section to chapter title mapping:', sectionTitleMap);
+
+      // Process each section
+      for (let i = 0; i < sections.length; i++) {
         if (signal?.aborted) {
           const partialBuffer = await combineAudioChunks(audioChunks, format);
           return partialBuffer;
         }
 
-        try {
-          const trimmedText = text.trim();
-          if (!trimmedText) continue;
+        const section = sections[i];
+        const trimmedText = section.text.trim();
+        if (!trimmedText) continue;
 
+        try {
           const ttsResponse = await fetch('/api/tts', {
             method: 'POST',
             headers: {
@@ -222,7 +249,7 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
               text: trimmedText,
               voice: voice,
               speed: voiceSpeed,
-              format: 'audiobook'
+              format: format === 'm4b' ? 'aac' : 'mp3',
             }),
             signal
           });
@@ -236,27 +263,15 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
             throw new Error('Received empty audio buffer from TTS');
           }
 
-          // Find matching chapter title from TOC if available
-          let chapterTitle;
-          if (spine && chapters.length > 0) {
-            let spineIndex = processedLength;
-            let currentSpineHref: string | undefined;
-            
-            spine.each((item: SpineItem) => {
-              if (spineIndex === 0) {
-                currentSpineHref = item.href;
-              }
-              spineIndex--;
-            });
-
-            const matchingChapter = chapters.find(chapter => 
-              chapter.href && currentSpineHref?.includes(chapter.href)
-            );
-            chapterTitle = matchingChapter?.label || `Section ${processedLength + 1}`;
-          } else {
-            chapterTitle = `Section ${processedLength + 1}`;
+          // Get the chapter title from our pre-computed map
+          let chapterTitle = sectionTitleMap.get(section.href);
+          
+          // If no chapter title found, use index-based naming
+          if (!chapterTitle) {
+            chapterTitle = `Unknown Section - ${i + 1}`;
           }
 
+          console.log('Processed audiobook chapter title:', chapterTitle);
           audioChunks.push({
             buffer: audioBuffer,
             title: chapterTitle,
@@ -271,8 +286,6 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
           });
 
           currentTime += (audioBuffer.byteLength + 48000) / 48000;
-
-          // Update progress based on processed text length
           processedLength += trimmedText.length;
           onProgress((processedLength / totalLength) * 100);
 
diff --git a/src/contexts/PDFContext.tsx b/src/contexts/PDFContext.tsx
@@ -246,7 +246,7 @@ export function PDFProvider({ children }: { children: ReactNode }) {
               text,
               voice: voice,
               speed: voiceSpeed,
-              format: 'audiobook'
+              format: format === 'm4b' ? 'aac' : 'mp3'
             }),
             signal
           });