Skip to content

Commit 733b418

Browse files
committed
feat(tts): add word-by-word highlighting with whisper.cpp
- Introduce `/api/whisper` endpoint which uses `whisper.cpp` (via a `WHISPER_CPP_BIN` executable) and `ffmpeg` to generate word-level audio alignments from provided audio and text. - Integrate word-level alignments into `TTSContext`, tracking the currently spoken word based on audio seek position and provided timestamps. Alignments are cached in-memory and fetched asynchronously. - Add new configuration options (`pdfWordHighlightEnabled`, `epubWordHighlightEnabled`) to `ConfigContext` and `Dexie` for enabling/disabling the feature. - Implement visual word highlighting in both `PDFViewer` and `EPUBViewer` by mapping TTS-aligned words to rendered text elements. - Enhance `EPUBContext` and `PDFContext` with new `highlightWordIndex` and `clearWordHighlights` functions, utilizing fuzzy string matching (`cmpstr`) to robustly align spoken words with displayed text for accurate highlighting. - Update `DocumentSettings` to include user-facing toggles for the new highlighting modes.
1 parent ac0b47d commit 733b418

File tree

12 files changed

+1336
-243
lines changed

12 files changed

+1336
-243
lines changed

src/app/api/whisper/route.ts

Lines changed: 451 additions & 0 deletions
Large diffs are not rendered by default.

src/components/DocumentSettings.tsx

Lines changed: 71 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ export function DocumentSettings({ isOpen, setIsOpen, epub, html }: {
3535
updateConfigKey,
3636
pdfHighlightEnabled,
3737
epubHighlightEnabled,
38+
pdfWordHighlightEnabled,
39+
epubWordHighlightEnabled,
3840
} = useConfig();
3941
const { createFullAudioBook: createEPUBAudioBook, regenerateChapter: regenerateEPUBChapter } = useEPUB();
4042
const { createFullAudioBook: createPDFAudioBook, regenerateChapter: regeneratePDFChapter } = usePDF();
@@ -324,40 +326,82 @@ export function DocumentSettings({ isOpen, setIsOpen, epub, html }: {
324326
</span>
325327
</label>
326328
<p className="text-sm text-muted pl-6">
327-
Merge sentences across page or section breaks for smoother TTS.
329+
Merge sentences across page or section breaks
328330
</p>
329331
</div>
330332
)}
331333
{!epub && !html && (
332-
<div className="space-y-1">
333-
<label className="flex items-center space-x-2">
334-
<input
335-
type="checkbox"
336-
checked={pdfHighlightEnabled}
337-
onChange={(e) => updateConfigKey('pdfHighlightEnabled', e.target.checked)}
338-
className="form-checkbox h-4 w-4 text-accent rounded border-muted"
339-
/>
340-
<span className="text-sm font-medium text-foreground">Highlight text during playback</span>
341-
</label>
342-
<p className="text-sm text-muted pl-6">
343-
Show visual highlighting in the PDF viewer while TTS is reading.
344-
</p>
334+
<div className="space-y-2">
335+
<div className="space-y-1">
336+
<label className="flex items-center space-x-2">
337+
<input
338+
type="checkbox"
339+
checked={pdfHighlightEnabled}
340+
onChange={(e) => updateConfigKey('pdfHighlightEnabled', e.target.checked)}
341+
className="form-checkbox h-4 w-4 text-accent rounded border-muted"
342+
/>
343+
<span className="text-sm font-medium text-foreground">Highlight text during playback</span>
344+
</label>
345+
<p className="text-sm text-muted pl-6">
346+
Visual text playback highlighting in the PDF viewer
347+
</p>
348+
</div>
349+
<div className="space-y-1 pl-6">
350+
<label className="flex items-center space-x-2">
351+
<input
352+
type="checkbox"
353+
checked={pdfWordHighlightEnabled && pdfHighlightEnabled}
354+
disabled={!pdfHighlightEnabled}
355+
onChange={(e) =>
356+
updateConfigKey('pdfWordHighlightEnabled', e.target.checked)
357+
}
358+
className="form-checkbox h-4 w-4 text-accent rounded border-muted disabled:opacity-50"
359+
/>
360+
<span className="text-sm font-medium text-foreground">
361+
Word-by-word
362+
</span>
363+
</label>
364+
<p className="text-sm text-muted pl-6">
365+
Highlight individual words using audio timestamps generated by whisper.cpp
366+
</p>
367+
</div>
345368
</div>
346369
)}
347370
{epub && (
348-
<div className="space-y-1">
349-
<label className="flex items-center space-x-2">
350-
<input
351-
type="checkbox"
352-
checked={epubHighlightEnabled}
353-
onChange={(e) => updateConfigKey('epubHighlightEnabled', e.target.checked)}
354-
className="form-checkbox h-4 w-4 text-accent rounded border-muted"
355-
/>
356-
<span className="text-sm font-medium text-foreground">Highlight text during playback</span>
357-
</label>
358-
<p className="text-sm text-muted pl-6">
359-
Show visual highlighting in the EPUB viewer while TTS is reading.
360-
</p>
371+
<div className="space-y-2">
372+
<div className="space-y-1">
373+
<label className="flex items-center space-x-2">
374+
<input
375+
type="checkbox"
376+
checked={epubHighlightEnabled}
377+
onChange={(e) => updateConfigKey('epubHighlightEnabled', e.target.checked)}
378+
className="form-checkbox h-4 w-4 text-accent rounded border-muted"
379+
/>
380+
<span className="text-sm font-medium text-foreground">Highlight text during playback</span>
381+
</label>
382+
<p className="text-sm text-muted pl-6">
383+
Visual text playback highlighting in the EPUB viewer
384+
</p>
385+
</div>
386+
<div className="space-y-1 pl-6">
387+
<label className="flex items-center space-x-2">
388+
<input
389+
type="checkbox"
390+
checked={epubWordHighlightEnabled && epubHighlightEnabled}
391+
disabled={!epubHighlightEnabled}
392+
onChange={(e) =>
393+
updateConfigKey('epubWordHighlightEnabled', e.target.checked)
394+
}
395+
className="form-checkbox h-4 w-4 text-accent rounded border-muted disabled:opacity-50"
396+
/>
397+
<span className="text-sm font-medium text-foreground">
398+
Word-by-word
399+
</span>
400+
</label>
401+
<p className="text-sm text-muted pl-6">
402+
Highlight individual words using audio timestamps generated by whisper.cpp
403+
</p>
404+
</div>
361405
</div>
362406
)}
363407
{epub && (

src/components/EPUBViewer.tsx

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,18 @@ export function EPUBViewer({ className = '' }: EPUBViewerProps) {
3030
setRendition,
3131
extractPageText,
3232
highlightPattern,
33-
clearHighlights
33+
clearHighlights,
34+
highlightWordIndex,
35+
clearWordHighlights
3436
} = useEPUB();
35-
const { registerLocationChangeHandler, pause, currentSentence } = useTTS();
36-
const { epubTheme } = useConfig();
37+
const {
38+
registerLocationChangeHandler,
39+
pause,
40+
currentSentence,
41+
currentSentenceAlignment,
42+
currentWordIndex
43+
} = useTTS();
44+
const { epubTheme, epubHighlightEnabled, epubWordHighlightEnabled } = useConfig();
3745
const { updateTheme } = useEPUBTheme(epubTheme, renditionRef.current);
3846
const containerRef = useRef<HTMLDivElement>(null);
3947
const { isResizing, setIsResizing, dimensions } = useEPUBResize(containerRef);
@@ -70,6 +78,38 @@ export function EPUBViewer({ className = '' }: EPUBViewerProps) {
7078
}
7179
}, [currentSentence, highlightPattern, clearHighlights]);
7280

81+
// Word-level highlight layered on top of the block highlight
82+
useEffect(() => {
83+
if (!epubHighlightEnabled || !epubWordHighlightEnabled) {
84+
clearWordHighlights();
85+
return;
86+
}
87+
88+
if (currentWordIndex === null || currentWordIndex === undefined || currentWordIndex < 0) {
89+
clearWordHighlights();
90+
return;
91+
}
92+
93+
if (!currentSentenceAlignment) {
94+
clearWordHighlights();
95+
return;
96+
}
97+
98+
highlightWordIndex(
99+
currentSentenceAlignment,
100+
currentWordIndex,
101+
currentSentence || ''
102+
);
103+
}, [
104+
currentWordIndex,
105+
currentSentence,
106+
currentSentenceAlignment,
107+
epubHighlightEnabled,
108+
epubWordHighlightEnabled,
109+
clearWordHighlights,
110+
highlightWordIndex
111+
]);
112+
73113
if (!currDocData) {
74114
return <DocumentSkeleton />;
75115
}
@@ -95,4 +135,4 @@ export function EPUBViewer({ className = '' }: EPUBViewerProps) {
95135
</div>
96136
</div>
97137
);
98-
}
138+
}

src/components/PDFViewer.tsx

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,22 @@ export function PDFViewer({ zoomLevel }: PDFViewerProps) {
2626
const { containerWidth } = usePDFResize(containerRef);
2727

2828
// Config context
29-
const { viewType, pdfHighlightEnabled } = useConfig();
29+
const { viewType, pdfHighlightEnabled, pdfWordHighlightEnabled } = useConfig();
3030

3131
// TTS context
3232
const {
3333
currentSentence,
34+
currentWordIndex,
35+
currentSentenceAlignment,
3436
skipToLocation,
3537
} = useTTS();
3638

3739
// PDF context
3840
const {
3941
highlightPattern,
4042
clearHighlights,
43+
clearWordHighlights,
44+
highlightWordIndex,
4145
onDocumentLoadSuccess,
4246
currDocData,
4347
currDocPages,
@@ -74,6 +78,46 @@ export function PDFViewer({ zoomLevel }: PDFViewerProps) {
7478
};
7579
}, [currDocText, currentSentence, highlightPattern, clearHighlights, pdfHighlightEnabled]);
7680

81+
// Word-level highlight layered on top of the block highlight
82+
useEffect(() => {
83+
if (!pdfHighlightEnabled || !pdfWordHighlightEnabled) {
84+
clearWordHighlights();
85+
return;
86+
}
87+
88+
if (currentWordIndex === null || currentWordIndex === undefined || currentWordIndex < 0) {
89+
clearWordHighlights();
90+
return;
91+
}
92+
93+
const wordEntry =
94+
currentSentenceAlignment &&
95+
currentWordIndex < currentSentenceAlignment.words.length
96+
? currentSentenceAlignment.words[currentWordIndex]
97+
: undefined;
98+
const wordText = wordEntry?.text || null;
99+
100+
if (!wordText) {
101+
clearWordHighlights();
102+
return;
103+
}
104+
105+
highlightWordIndex(
106+
currentSentenceAlignment,
107+
currentWordIndex,
108+
currentSentence || '',
109+
containerRef as RefObject<HTMLDivElement>
110+
);
111+
}, [
112+
currentWordIndex,
113+
currentSentence,
114+
currentSentenceAlignment,
115+
pdfHighlightEnabled,
116+
pdfWordHighlightEnabled,
117+
clearWordHighlights,
118+
highlightWordIndex
119+
]);
120+
77121
// Add page dimensions state
78122
const [pageWidth, setPageWidth] = useState<number>(595); // default A4 width
79123
const [pageHeight, setPageHeight] = useState<number>(842); // default A4 height

src/contexts/ConfigContext.tsx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ interface ConfigContextType {
3232
isLoading: boolean;
3333
isDBReady: boolean;
3434
pdfHighlightEnabled: boolean;
35+
pdfWordHighlightEnabled: boolean;
3536
epubHighlightEnabled: boolean;
37+
epubWordHighlightEnabled: boolean;
3638
}
3739

3840
const ConfigContext = createContext<ConfigContextType | undefined>(undefined);
@@ -103,7 +105,9 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
103105
savedVoices,
104106
smartSentenceSplitting,
105107
pdfHighlightEnabled,
108+
pdfWordHighlightEnabled,
106109
epubHighlightEnabled,
110+
epubWordHighlightEnabled,
107111
} = config || APP_CONFIG_DEFAULTS;
108112

109113
/**
@@ -201,7 +205,9 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
201205
isLoading,
202206
isDBReady,
203207
pdfHighlightEnabled,
204-
epubHighlightEnabled
208+
pdfWordHighlightEnabled,
209+
epubHighlightEnabled,
210+
epubWordHighlightEnabled
205211
}}>
206212
{children}
207213
</ConfigContext.Provider>

0 commit comments

Comments
 (0)