Skip to content

Commit ce9df9a

Browse files
committed
implement retry logic for TTS API calls and clean up code
1 parent 2f2442e commit ce9df9a

File tree

7 files changed

+151
-72
lines changed

7 files changed

+151
-72
lines changed

playwright.config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export default defineConfig({
3737

3838
/* Run your local dev server before starting the tests */
3939
webServer: {
40-
command: process.env.CI ? 'npm run build && npm run start' : 'npm run dev',
40+
command: 'npm run build && npm run start',
4141
url: 'http://localhost:3003',
4242
reuseExistingServer: !process.env.CI,
4343
timeout: 120 * 1000,

src/app/api/tts/voices/route.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export async function GET(req: NextRequest) {
99
const openApiBaseUrl = req.headers.get('x-openai-base-url') || process.env.API_BASE;
1010

1111
// Request voices from OpenAI
12-
const response = await fetch(`${openApiBaseUrl || 'https://api.openai.com/v1'}/audio/voices`, {
12+
const response = await fetch(`${openApiBaseUrl}/audio/voices`, {
1313
headers: {
1414
'Authorization': `Bearer ${openApiKey}`,
1515
'Content-Type': 'application/json',

src/app/layout.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ const isDev = process.env.NEXT_PUBLIC_NODE_ENV !== 'production' || process.env.N
5050

5151
export default function RootLayout({ children }: { children: ReactNode }) {
5252
return (
53-
<html lang="en" suppressHydrationWarning>
53+
<html lang="en">
5454
<head>
5555
<meta name="color-scheme" content="light dark" />
5656
</head>

src/contexts/EPUBContext.tsx

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { SpineItem } from 'epubjs/types/section';
2020
import { useParams } from 'next/navigation';
2121
import { useConfig } from './ConfigContext';
2222
import { combineAudioChunks } from '@/utils/audio';
23+
import { withRetry } from '@/utils/audio';
2324

2425
interface EPUBContextType {
2526
currDocData: ArrayBuffer | undefined;
@@ -202,15 +203,14 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
202203

203204
// Get TOC for chapter titles
204205
const chapters = tocRef.current || [];
205-
console.log('Chapter map:', chapters);
206+
console.log('Chapters:', chapters);
206207

207208
// Create a map of section hrefs to their chapter titles
208209
const sectionTitleMap = new Map<string, string>();
209210

210211
// First, loop through all chapters to create the mapping
211212
for (const chapter of chapters) {
212213
if (!chapter.href) continue;
213-
214214
const chapterBaseHref = chapter.href.split('#')[0];
215215
const chapterTitle = chapter.label.trim();
216216

@@ -240,29 +240,40 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
240240
if (!trimmedText) continue;
241241

242242
try {
243-
const ttsResponse = await fetch('/api/tts', {
244-
method: 'POST',
245-
headers: {
246-
'x-openai-key': apiKey,
247-
'x-openai-base-url': baseUrl,
243+
const audioBuffer = await withRetry(
244+
async () => {
245+
const ttsResponse = await fetch('/api/tts', {
246+
method: 'POST',
247+
headers: {
248+
'x-openai-key': apiKey,
249+
'x-openai-base-url': baseUrl,
250+
},
251+
body: JSON.stringify({
252+
text: trimmedText,
253+
voice: voice,
254+
speed: voiceSpeed,
255+
format: format === 'm4b' ? 'aac' : 'mp3',
256+
}),
257+
signal
258+
});
259+
260+
if (!ttsResponse.ok) {
261+
throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
262+
}
263+
264+
const buffer = await ttsResponse.arrayBuffer();
265+
if (buffer.byteLength === 0) {
266+
throw new Error('Received empty audio buffer from TTS');
267+
}
268+
return buffer;
248269
},
249-
body: JSON.stringify({
250-
text: trimmedText,
251-
voice: voice,
252-
speed: voiceSpeed,
253-
format: format === 'm4b' ? 'aac' : 'mp3',
254-
}),
255-
signal
256-
});
257-
258-
if (!ttsResponse.ok) {
259-
throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
260-
}
261-
262-
const audioBuffer = await ttsResponse.arrayBuffer();
263-
if (audioBuffer.byteLength === 0) {
264-
throw new Error('Received empty audio buffer from TTS');
265-
}
270+
{
271+
maxRetries: 2,
272+
initialDelay: 5000,
273+
maxDelay: 10000,
274+
backoffFactor: 2
275+
}
276+
);
266277

267278
// Get the chapter title from our pre-computed map
268279
let chapterTitle = sectionTitleMap.get(section.href);

src/contexts/PDFContext.tsx

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import {
3535
} from '@/utils/pdf';
3636

3737
import type { PDFDocumentProxy } from 'pdfjs-dist';
38-
import { combineAudioChunks } from '@/utils/audio';
38+
import { combineAudioChunks, withRetry } from '@/utils/audio';
3939

4040
/**
4141
* Interface defining all available methods and properties in the PDF context
@@ -235,29 +235,40 @@ export function PDFProvider({ children }: { children: ReactNode }) {
235235

236236
const text = textPerPage[i];
237237
try {
238-
const ttsResponse = await fetch('/api/tts', {
239-
method: 'POST',
240-
headers: {
241-
'x-openai-key': apiKey,
242-
'x-openai-base-url': baseUrl,
238+
const audioBuffer = await withRetry(
239+
async () => {
240+
const ttsResponse = await fetch('/api/tts', {
241+
method: 'POST',
242+
headers: {
243+
'x-openai-key': apiKey,
244+
'x-openai-base-url': baseUrl,
245+
},
246+
body: JSON.stringify({
247+
text,
248+
voice: voice,
249+
speed: voiceSpeed,
250+
format: format === 'm4b' ? 'aac' : 'mp3'
251+
}),
252+
signal
253+
});
254+
255+
if (!ttsResponse.ok) {
256+
throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
257+
}
258+
259+
const buffer = await ttsResponse.arrayBuffer();
260+
if (buffer.byteLength === 0) {
261+
throw new Error('Received empty audio buffer from TTS');
262+
}
263+
return buffer;
243264
},
244-
body: JSON.stringify({
245-
text,
246-
voice: voice,
247-
speed: voiceSpeed,
248-
format: format === 'm4b' ? 'aac' : 'mp3'
249-
}),
250-
signal
251-
});
252-
253-
if (!ttsResponse.ok) {
254-
throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
255-
}
256-
257-
const audioBuffer = await ttsResponse.arrayBuffer();
258-
if (audioBuffer.byteLength === 0) {
259-
throw new Error('Received empty audio buffer from TTS');
260-
}
265+
{
266+
maxRetries: 3,
267+
initialDelay: 1000,
268+
maxDelay: 5000,
269+
backoffFactor: 2
270+
}
271+
);
261272

262273
audioChunks.push({
263274
buffer: audioBuffer,
@@ -273,8 +284,6 @@ export function PDFProvider({ children }: { children: ReactNode }) {
273284
});
274285

275286
currentTime += (audioBuffer.byteLength + 48000) / 48000;
276-
277-
// Update progress based on processed text length
278287
processedLength += text.length;
279288
onProgress((processedLength / totalLength) * 100);
280289

src/contexts/TTSContext.tsx

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import { useMediaSession } from '@/hooks/audio/useMediaSession';
3535
import { useAudioContext } from '@/hooks/audio/useAudioContext';
3636
import { getLastDocumentLocation } from '@/utils/indexedDB';
3737
import { useBackgroundState } from '@/hooks/audio/useBackgroundState';
38+
import { withRetry } from '@/utils/audio';
3839

3940
// Media globals
4041
declare global {
@@ -409,31 +410,40 @@ export function TTSProvider({ children }: { children: ReactNode }) {
409410
const controller = new AbortController();
410411
activeAbortControllers.current.add(controller);
411412

412-
const response = await fetch('/api/tts', {
413-
method: 'POST',
414-
headers: {
415-
'Content-Type': 'application/json',
416-
'x-openai-key': openApiKey || '',
417-
'x-openai-base-url': openApiBaseUrl || '',
413+
const arrayBuffer = await withRetry(
414+
async () => {
415+
const response = await fetch('/api/tts', {
416+
method: 'POST',
417+
headers: {
418+
'Content-Type': 'application/json',
419+
'x-openai-key': openApiKey || '',
420+
'x-openai-base-url': openApiBaseUrl || '',
421+
},
422+
body: JSON.stringify({
423+
text: sentence,
424+
voice: voice,
425+
speed: speed,
426+
}),
427+
signal: controller.signal,
428+
});
429+
430+
if (!response.ok) {
431+
throw new Error('Failed to generate audio');
432+
}
433+
434+
return response.arrayBuffer();
418435
},
419-
body: JSON.stringify({
420-
text: sentence,
421-
voice: voice,
422-
speed: speed,
423-
}),
424-
signal: controller.signal,
425-
});
436+
{
437+
maxRetries: 3,
438+
initialDelay: 1000,
439+
maxDelay: 5000,
440+
backoffFactor: 2
441+
}
442+
);
426443

427444
// Remove the controller once the request is complete
428445
activeAbortControllers.current.delete(controller);
429446

430-
if (!response.ok) {
431-
throw new Error('Failed to generate audio');
432-
}
433-
434-
// Get the raw array buffer - no need to decode since it's already MP3
435-
const arrayBuffer = await response.arrayBuffer();
436-
437447
// Cache the array buffer
438448
audioCache.set(sentence, arrayBuffer);
439449

src/utils/audio.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,55 @@ interface AudioChunk {
88
startTime: number;
99
}
1010

11+
interface RetryOptions {
12+
maxRetries?: number;
13+
initialDelay?: number;
14+
maxDelay?: number;
15+
backoffFactor?: number;
16+
}
17+
18+
/**
19+
* Executes a function with exponential backoff retry logic
20+
* @param operation Function to retry
21+
* @param options Retry configuration options
22+
* @returns Promise resolving to the operation result
23+
*/
24+
export const withRetry = async <T>(
25+
operation: () => Promise<T>,
26+
options: RetryOptions = {}
27+
): Promise<T> => {
28+
const {
29+
maxRetries = 3,
30+
initialDelay = 1000,
31+
maxDelay = 10000,
32+
backoffFactor = 2
33+
} = options;
34+
35+
let lastError: Error | null = null;
36+
37+
for (let attempt = 0; attempt < maxRetries; attempt++) {
38+
try {
39+
return await operation();
40+
} catch (error) {
41+
lastError = error instanceof Error ? error : new Error(String(error));
42+
43+
if (attempt === maxRetries - 1) {
44+
break;
45+
}
46+
47+
const delay = Math.min(
48+
initialDelay * Math.pow(backoffFactor, attempt),
49+
maxDelay
50+
);
51+
52+
console.log(`Retry attempt ${attempt + 1}/${maxRetries} failed. Retrying in ${delay}ms...`);
53+
await new Promise(resolve => setTimeout(resolve, delay));
54+
}
55+
}
56+
57+
throw lastError || new Error('Operation failed after retries');
58+
}
59+
1160
/**
1261
* Combines audio chunks into a single audio file
1362
* @param audioChunks Array of audio chunks with metadata

0 commit comments

Comments
 (0)