Skip to content

Commit 9357ddd

Browse files
committed
Implement PDF audiobook generation with configurable margins for text extraction
1 parent 92220de commit 9357ddd

File tree

2 files changed

+130
-8
lines changed

2 files changed

+130
-8
lines changed

src/components/DocumentSettings.tsx

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
'use client';
22

3-
import { Fragment, useState, useRef, useEffect } from 'react';
3+
import { Fragment, useState, useRef, useCallback, useEffect } from 'react';
44
import { Dialog, DialogPanel, Transition, TransitionChild, Listbox, ListboxButton, ListboxOptions, ListboxOption, Button } from '@headlessui/react';
55
import { useConfig, ViewType } from '@/contexts/ConfigContext';
66
import { ChevronUpDownIcon, CheckIcon } from '@/components/icons/Icons';
77
import { useEPUB } from '@/contexts/EPUBContext';
8+
import { usePDF } from '@/contexts/PDFContext';
89

910
const isDev = process.env.NEXT_PUBLIC_NODE_ENV !== 'production' || process.env.NODE_ENV == null;
1011

@@ -32,6 +33,7 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
3233
updateConfigKey
3334
} = useConfig();
3435
const { createFullAudioBook } = useEPUB();
36+
const { createFullAudioBook: createPDFAudioBook } = usePDF();
3537
const [progress, setProgress] = useState(0);
3638
const [isGenerating, setIsGenerating] = useState(false);
3739
const [localMargins, setLocalMargins] = useState({
@@ -70,13 +72,16 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
7072
}
7173
};
7274

73-
const handleStartGeneration = async () => {
75+
const handleStartGeneration = useCallback(async () => {
7476
setIsGenerating(true);
7577
setProgress(0);
7678
abortControllerRef.current = new AbortController();
7779

7880
try {
79-
const audioBuffer = await createFullAudioBook(
81+
const audioBuffer = epub ? await createFullAudioBook(
82+
(progress) => setProgress(progress),
83+
abortControllerRef.current.signal
84+
) : await createPDFAudioBook(
8085
(progress) => setProgress(progress),
8186
abortControllerRef.current.signal
8287
);
@@ -102,7 +107,7 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
102107
setProgress(0);
103108
abortControllerRef.current = null;
104109
}
105-
};
110+
}, [createFullAudioBook, createPDFAudioBook, epub]);
106111

107112
const handleCancel = () => {
108113
if (abortControllerRef.current) {
@@ -148,7 +153,7 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
148153
transform transition-transform duration-200 ease-in-out hover:scale-[1.04]"
149154
onClick={handleStartGeneration}
150155
>
151-
Export to Audiobook (experimental)
156+
Export to audiobook.mp3 (experimental)
152157
</Button>
153158
) : (
154159
<div className="space-y-2">
@@ -177,7 +182,7 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
177182
{!epub && <div className="space-y-6">
178183
<div className="mt-4 space-y-2">
179184
<label className="block text-sm font-medium text-foreground mb-4">
180-
Adjust extraction margins (experimental)
185+
Text extraction margins
181186
</label>
182187
<div className="grid grid-cols-1 sm:grid-cols-2 gap-2">
183188
{/* Header Margin */}
@@ -261,7 +266,7 @@ export function DocumentSettings({ isOpen, setIsOpen, epub }: DocViewSettingsPro
261266
</div>
262267
</div>
263268
<p className="text-xs text-muted mt-2">
264-
Adjust margins to exclude content from edges of the page during text extraction
269+
Adjust margins to exclude content from edges of the page during text extraction (experimental)
265270
</p>
266271
</div>
267272
<Listbox

src/contexts/PDFContext.tsx

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import {
3636
} from '@/utils/pdf';
3737

3838
import type { PDFDocumentProxy } from 'pdfjs-dist';
39+
import { useParams } from 'next/navigation';
3940

4041
/**
4142
* Interface defining all available methods and properties in the PDF context
@@ -62,6 +63,7 @@ interface PDFContextType {
6263
stopAndPlayFromIndex: (index: number) => void,
6364
isProcessing: boolean
6465
) => void;
66+
createFullAudioBook: (onProgress: (progress: number) => void, signal?: AbortSignal) => Promise<ArrayBuffer>;
6567
}
6668

6769
// Create the context
@@ -88,7 +90,11 @@ export function PDFProvider({ children }: { children: ReactNode }) {
8890
headerMargin,
8991
footerMargin,
9092
leftMargin,
91-
rightMargin
93+
rightMargin,
94+
apiKey,
95+
baseUrl,
96+
voiceSpeed,
97+
voice,
9298
} = useConfig();
9399

94100
// Current document state
@@ -177,6 +183,115 @@ export function PDFProvider({ children }: { children: ReactNode }) {
177183
stop();
178184
}, [setCurrDocPages, stop]);
179185

186+
/**
187+
* Creates a complete audiobook by processing all PDF pages through NLP and TTS
188+
* @param {Function} onProgress - Callback for progress updates
189+
* @param {AbortSignal} signal - Optional signal for cancellation
190+
* @returns {Promise<ArrayBuffer>} The complete audiobook as an ArrayBuffer
191+
*/
192+
const createFullAudioBook = useCallback(async (
193+
onProgress: (progress: number) => void,
194+
signal?: AbortSignal
195+
): Promise<ArrayBuffer> => {
196+
try {
197+
if (!pdfDocument) {
198+
throw new Error('No PDF document loaded');
199+
}
200+
201+
// Create an array to store all audio chunks
202+
const audioChunks: ArrayBuffer[] = [];
203+
const totalPages = pdfDocument.numPages;
204+
let processedPages = 0;
205+
206+
// Process each page of the PDF
207+
for (let pageNum = 1; pageNum <= totalPages; pageNum++) {
208+
// Check for cancellation
209+
if (signal?.aborted) {
210+
const partialBuffer = combineAudioChunks(audioChunks);
211+
return partialBuffer;
212+
}
213+
214+
// Extract text from the current page
215+
const text = await extractTextFromPDF(pdfDocument, pageNum, {
216+
header: headerMargin,
217+
footer: footerMargin,
218+
left: leftMargin,
219+
right: rightMargin
220+
});
221+
222+
if (!text.trim()) {
223+
processedPages++;
224+
continue;
225+
}
226+
227+
try {
228+
const ttsResponse = await fetch('/api/tts', {
229+
method: 'POST',
230+
headers: {
231+
'x-openai-key': apiKey,
232+
'x-openai-base-url': baseUrl,
233+
},
234+
body: JSON.stringify({
235+
text: text.trim(),
236+
voice: voice,
237+
speed: voiceSpeed,
238+
}),
239+
signal
240+
});
241+
242+
if (!ttsResponse.ok) {
243+
throw new Error(`TTS processing failed with status ${ttsResponse.status}`);
244+
}
245+
246+
const audioBuffer = await ttsResponse.arrayBuffer();
247+
if (audioBuffer.byteLength === 0) {
248+
throw new Error('Received empty audio buffer from TTS');
249+
}
250+
251+
audioChunks.push(audioBuffer);
252+
253+
// Add a small pause between pages (1s of silence)
254+
const silenceBuffer = new ArrayBuffer(48000);
255+
audioChunks.push(silenceBuffer);
256+
257+
} catch (error) {
258+
if (error instanceof Error && error.name === 'AbortError') {
259+
console.log('TTS request aborted');
260+
const partialBuffer = combineAudioChunks(audioChunks);
261+
return partialBuffer;
262+
}
263+
console.error('Error processing page:', error);
264+
}
265+
266+
processedPages++;
267+
onProgress((processedPages / totalPages) * 100);
268+
}
269+
270+
if (audioChunks.length === 0) {
271+
throw new Error('No audio was generated from the PDF content');
272+
}
273+
274+
return combineAudioChunks(audioChunks);
275+
} catch (error) {
276+
console.error('Error creating audiobook:', error);
277+
throw error;
278+
}
279+
}, [pdfDocument, headerMargin, footerMargin, leftMargin, rightMargin, apiKey, baseUrl, voice, voiceSpeed]);
280+
281+
// Helper function to combine audio chunks
282+
const combineAudioChunks = (audioChunks: ArrayBuffer[]): ArrayBuffer => {
283+
const totalLength = audioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0);
284+
const combinedBuffer = new Uint8Array(totalLength);
285+
286+
let offset = 0;
287+
for (const chunk of audioChunks) {
288+
combinedBuffer.set(new Uint8Array(chunk), offset);
289+
offset += chunk.byteLength;
290+
}
291+
292+
return combinedBuffer.buffer;
293+
};
294+
180295
// Context value memoization
181296
const contextValue = useMemo(
182297
() => ({
@@ -192,6 +307,7 @@ export function PDFProvider({ children }: { children: ReactNode }) {
192307
clearHighlights,
193308
handleTextClick,
194309
pdfDocument,
310+
createFullAudioBook,
195311
}),
196312
[
197313
onDocumentLoadSuccess,
@@ -203,6 +319,7 @@ export function PDFProvider({ children }: { children: ReactNode }) {
203319
currDocText,
204320
clearCurrDoc,
205321
pdfDocument,
322+
createFullAudioBook,
206323
]
207324
);
208325

0 commit comments

Comments
 (0)