implement retry logic for TTS API calls and clean up code

richardr1126 · richardr1126 · commit ce9df9a5ab41 · 2025-03-03T23:03:52.000-07:00
diff --git a/playwright.config.ts b/playwright.config.ts
@@ -37,7 +37,7 @@ export default defineConfig({
 
   /* Run your local dev server before starting the tests */
   webServer: {
-    command: process.env.CI ? 'npm run build && npm run start' : 'npm run dev',
+    command: 'npm run build && npm run start',
     url: 'http://localhost:3003',
     reuseExistingServer: !process.env.CI,
     timeout: 120 * 1000,
diff --git a/src/app/api/tts/voices/route.ts b/src/app/api/tts/voices/route.ts
@@ -9,7 +9,7 @@ export async function GET(req: NextRequest) {
     const openApiBaseUrl = req.headers.get('x-openai-base-url') || process.env.API_BASE;
 
     // Request voices from OpenAI
-    const response = await fetch(`${openApiBaseUrl || 'https://api.openai.com/v1'}/audio/voices`, {
+    const response = await fetch(`${openApiBaseUrl}/audio/voices`, {
       headers: {
         'Authorization': `Bearer ${openApiKey}`,
         'Content-Type': 'application/json',
diff --git a/src/app/layout.tsx b/src/app/layout.tsx
@@ -50,7 +50,7 @@ const isDev = process.env.NEXT_PUBLIC_NODE_ENV !== 'production' || process.env.N
 
 export default function RootLayout({ children }: { children: ReactNode }) {
   return (
-    <html lang="en" suppressHydrationWarning>
+    <html lang="en">
       <head>
         <meta name="color-scheme" content="light dark" />
       </head>
diff --git a/src/contexts/EPUBContext.tsx b/src/contexts/EPUBContext.tsx
@@ -20,6 +20,7 @@ import { SpineItem } from 'epubjs/types/section';
 import { useParams } from 'next/navigation';
 import { useConfig } from './ConfigContext';
 import { combineAudioChunks } from '@/utils/audio';
+import { withRetry } from '@/utils/audio';
 
 interface EPUBContextType {
   currDocData: ArrayBuffer | undefined;
@@ -202,15 +203,14 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
 
       // Get TOC for chapter titles
       const chapters = tocRef.current || [];
-      console.log('Chapter map:', chapters);
+      console.log('Chapters:', chapters);
       
       // Create a map of section hrefs to their chapter titles
       const sectionTitleMap = new Map<string, string>();
       
       // First, loop through all chapters to create the mapping
       for (const chapter of chapters) {
         if (!chapter.href) continue;
-        
         const chapterBaseHref = chapter.href.split('#')[0];
         const chapterTitle = chapter.label.trim();
         
@@ -240,29 +240,40 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
         if (!trimmedText) continue;
 
         try {
-          const ttsResponse = await fetch('/api/tts', {
-            method: 'POST',
-            headers: {
-              'x-openai-key': apiKey,
-              'x-openai-base-url': baseUrl,
+          const audioBuffer = await withRetry(
+            async () => {
+              const ttsResponse = await fetch('/api/tts', {
+                method: 'POST',
+                headers: {
+                  'x-openai-key': apiKey,
+                  'x-openai-base-url': baseUrl,
+                },
+                body: JSON.stringify({
+                  text: trimmedText,
+                  voice: voice,
+                  speed: voiceSpeed,
+                  format: format === 'm4b' ? 'aac' : 'mp3',
+                }),
+                signal
+              });
+
+              if (!ttsResponse.ok) {
+                throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
+              }
+
+              const buffer = await ttsResponse.arrayBuffer();
+              if (buffer.byteLength === 0) {
+                throw new Error('Received empty audio buffer from TTS');
+              }
+              return buffer;
             },
-            body: JSON.stringify({
-              text: trimmedText,
-              voice: voice,
-              speed: voiceSpeed,
-              format: format === 'm4b' ? 'aac' : 'mp3',
-            }),
-            signal
-          });
-
-          if (!ttsResponse.ok) {
-            throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
-          }
-
-          const audioBuffer = await ttsResponse.arrayBuffer();
-          if (audioBuffer.byteLength === 0) {
-            throw new Error('Received empty audio buffer from TTS');
-          }
+            {
+              maxRetries: 2,
+              initialDelay: 5000,
+              maxDelay: 10000,
+              backoffFactor: 2
+            }
+          );
 
           // Get the chapter title from our pre-computed map
           let chapterTitle = sectionTitleMap.get(section.href);
diff --git a/src/contexts/PDFContext.tsx b/src/contexts/PDFContext.tsx
@@ -35,7 +35,7 @@ import {
 } from '@/utils/pdf';
 
 import type { PDFDocumentProxy } from 'pdfjs-dist';
-import { combineAudioChunks } from '@/utils/audio';
+import { combineAudioChunks, withRetry } from '@/utils/audio';
 
 /**
  * Interface defining all available methods and properties in the PDF context
@@ -235,29 +235,40 @@ export function PDFProvider({ children }: { children: ReactNode }) {
 
         const text = textPerPage[i];
         try {
-          const ttsResponse = await fetch('/api/tts', {
-            method: 'POST',
-            headers: {
-              'x-openai-key': apiKey,
-              'x-openai-base-url': baseUrl,
+          const audioBuffer = await withRetry(
+            async () => {
+              const ttsResponse = await fetch('/api/tts', {
+                method: 'POST',
+                headers: {
+                  'x-openai-key': apiKey,
+                  'x-openai-base-url': baseUrl,
+                },
+                body: JSON.stringify({
+                  text,
+                  voice: voice,
+                  speed: voiceSpeed,
+                  format: format === 'm4b' ? 'aac' : 'mp3'
+                }),
+                signal
+              });
+
+              if (!ttsResponse.ok) {
+                throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
+              }
+
+              const buffer = await ttsResponse.arrayBuffer();
+              if (buffer.byteLength === 0) {
+                throw new Error('Received empty audio buffer from TTS');
+              }
+              return buffer;
             },
-            body: JSON.stringify({
-              text,
-              voice: voice,
-              speed: voiceSpeed,
-              format: format === 'm4b' ? 'aac' : 'mp3'
-            }),
-            signal
-          });
-
-          if (!ttsResponse.ok) {
-            throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
-          }
-
-          const audioBuffer = await ttsResponse.arrayBuffer();
-          if (audioBuffer.byteLength === 0) {
-            throw new Error('Received empty audio buffer from TTS');
-          }
+            {
+              maxRetries: 3,
+              initialDelay: 1000,
+              maxDelay: 5000,
+              backoffFactor: 2
+            }
+          );
 
           audioChunks.push({
             buffer: audioBuffer,
@@ -273,8 +284,6 @@ export function PDFProvider({ children }: { children: ReactNode }) {
           });
 
           currentTime += (audioBuffer.byteLength + 48000) / 48000;
-
-          // Update progress based on processed text length
           processedLength += text.length;
           onProgress((processedLength / totalLength) * 100);
 
diff --git a/src/contexts/TTSContext.tsx b/src/contexts/TTSContext.tsx
@@ -35,6 +35,7 @@ import { useMediaSession } from '@/hooks/audio/useMediaSession';
 import { useAudioContext } from '@/hooks/audio/useAudioContext';
 import { getLastDocumentLocation } from '@/utils/indexedDB';
 import { useBackgroundState } from '@/hooks/audio/useBackgroundState';
+import { withRetry } from '@/utils/audio';
 
 // Media globals
 declare global {
@@ -409,31 +410,40 @@ export function TTSProvider({ children }: { children: ReactNode }) {
       const controller = new AbortController();
       activeAbortControllers.current.add(controller);
 
-      const response = await fetch('/api/tts', {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          'x-openai-key': openApiKey || '',
-          'x-openai-base-url': openApiBaseUrl || '',
+      const arrayBuffer = await withRetry(
+        async () => {
+          const response = await fetch('/api/tts', {
+            method: 'POST',
+            headers: {
+              'Content-Type': 'application/json',
+              'x-openai-key': openApiKey || '',
+              'x-openai-base-url': openApiBaseUrl || '',
+            },
+            body: JSON.stringify({
+              text: sentence,
+              voice: voice,
+              speed: speed,
+            }),
+            signal: controller.signal,
+          });
+
+          if (!response.ok) {
+            throw new Error('Failed to generate audio');
+          }
+
+          return response.arrayBuffer();
         },
-        body: JSON.stringify({
-          text: sentence,
-          voice: voice,
-          speed: speed,
-        }),
-        signal: controller.signal,
-      });
+        {
+          maxRetries: 3,
+          initialDelay: 1000,
+          maxDelay: 5000,
+          backoffFactor: 2
+        }
+      );
 
       // Remove the controller once the request is complete
       activeAbortControllers.current.delete(controller);
 
-      if (!response.ok) {
-        throw new Error('Failed to generate audio');
-      }
-
-      // Get the raw array buffer - no need to decode since it's already MP3
-      const arrayBuffer = await response.arrayBuffer();
-
       // Cache the array buffer
       audioCache.set(sentence, arrayBuffer);
 
diff --git a/src/utils/audio.ts b/src/utils/audio.ts
@@ -8,6 +8,55 @@ interface AudioChunk {
   startTime: number;
 }
 
+interface RetryOptions {
+  maxRetries?: number;
+  initialDelay?: number;
+  maxDelay?: number;
+  backoffFactor?: number;
+}
+
+/**
+ * Executes a function with exponential backoff retry logic
+ * @param operation Function to retry
+ * @param options Retry configuration options
+ * @returns Promise resolving to the operation result
+ */
+export const withRetry = async <T>(
+  operation: () => Promise<T>,
+  options: RetryOptions = {}
+): Promise<T> => {
+  const {
+    maxRetries = 3,
+    initialDelay = 1000,
+    maxDelay = 10000,
+    backoffFactor = 2
+  } = options;
+
+  let lastError: Error | null = null;
+  
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      return await operation();
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+      
+      if (attempt === maxRetries - 1) {
+        break;
+      }
+
+      const delay = Math.min(
+        initialDelay * Math.pow(backoffFactor, attempt),
+        maxDelay
+      );
+      
+      console.log(`Retry attempt ${attempt + 1}/${maxRetries} failed. Retrying in ${delay}ms...`);
+      await new Promise(resolve => setTimeout(resolve, delay));
+    }
+  }
+
+  throw lastError || new Error('Operation failed after retries');
+}
+
 /**
  * Combines audio chunks into a single audio file
  * @param audioChunks Array of audio chunks with metadata