Skip to content

Commit 071d967

Browse files
committed
feat(pdf): modernize text highlighting and parsing for fluidity
- Offload computationally intensive text matching for real-time highlighting to a dedicated Web Worker, ensuring the main thread remains responsive during playback. - Implement a new overlay-based highlighting system that renders independently of the PDF's text layer, providing smoother and more reliable visual feedback without interfering with document rendering. - Introduce a new setting allowing users to enable or disable real-time text highlighting in PDFs, offering personalized control over the reading interface. - Upgrade the underlying text comparison algorithm to Dice similarity for more accurate and context-aware matching of spoken words to on-screen text, improving synchronization precision. - Improve sentence boundary detection, especially for quoted dialogue and complex structures, by enhancing the NLP processing logic, leading to a more natural audio-text flow.
1 parent 2f39e8f commit 071d967

File tree

7 files changed

+545
-114
lines changed

7 files changed

+545
-114
lines changed

src/components/DocumentSettings.tsx

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ export function DocumentSettings({ isOpen, setIsOpen, epub, html }: {
3232
footerMargin,
3333
leftMargin,
3434
rightMargin,
35-
updateConfigKey
35+
updateConfigKey,
36+
pdfHighlightEnabled,
3637
} = useConfig();
3738
const { createFullAudioBook: createEPUBAudioBook, regenerateChapter: regenerateEPUBChapter } = useEPUB();
3839
const { createFullAudioBook: createPDFAudioBook, regenerateChapter: regeneratePDFChapter } = usePDF();
@@ -327,6 +328,22 @@ export function DocumentSettings({ isOpen, setIsOpen, epub, html }: {
327328
</p>
328329
</div>
329330
)}
331+
{!epub && !html && (
332+
<div className="space-y-1">
333+
<label className="flex items-center space-x-2">
334+
<input
335+
type="checkbox"
336+
checked={pdfHighlightEnabled}
337+
onChange={(e) => updateConfigKey('pdfHighlightEnabled', e.target.checked)}
338+
className="form-checkbox h-4 w-4 text-accent rounded border-muted"
339+
/>
340+
<span className="text-sm font-medium text-foreground">Highlight text during playback</span>
341+
</label>
342+
<p className="text-sm text-muted pl-6">
343+
Show visual highlighting in the PDF viewer while TTS is reading.
344+
</p>
345+
</div>
346+
)}
330347
{epub && (
331348
<div className="space-y-1">
332349
<label className="flex items-center space-x-2">

src/components/PDFViewer.tsx

Lines changed: 7 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -25,73 +25,25 @@ export function PDFViewer({ zoomLevel }: PDFViewerProps) {
2525
const { containerWidth } = usePDFResize(containerRef);
2626

2727
// Config context
28-
const { viewType } = useConfig();
28+
const { viewType, pdfHighlightEnabled } = useConfig();
2929

3030
// TTS context
3131
const {
3232
currentSentence,
33-
stopAndPlayFromIndex,
34-
isProcessing,
3533
skipToLocation,
3634
} = useTTS();
3735

3836
// PDF context
3937
const {
4038
highlightPattern,
4139
clearHighlights,
42-
handleTextClick,
4340
onDocumentLoadSuccess,
4441
currDocData,
4542
currDocPages,
4643
currDocText,
4744
currDocPage,
4845
} = usePDF();
4946

50-
// Add static styles once during component mount
51-
useEffect(() => {
52-
const styleElement = document.createElement('style');
53-
styleElement.textContent = `
54-
.react-pdf__Page__textContent span {
55-
cursor: pointer;
56-
transition: background-color 0.2s ease;
57-
}
58-
.react-pdf__Page__textContent span:hover {
59-
background-color: rgba(255, 255, 0, 0.2) !important;
60-
}
61-
`;
62-
document.head.appendChild(styleElement);
63-
return () => {
64-
styleElement.remove();
65-
};
66-
}, []);
67-
68-
useEffect(() => {
69-
/*
70-
* Sets up click event listeners for text selection in the PDF.
71-
* Cleans up by removing the event listener when component unmounts.
72-
*
73-
* Dependencies:
74-
* - pdfText: Re-run when the extracted text content changes
75-
* - handleTextClick: Function from context that could change
76-
* - stopAndPlayFromIndex: Function from context that could change
77-
*/
78-
const container = containerRef.current;
79-
if (!container) return;
80-
if (!currDocText) return;
81-
82-
const handleClick = (event: MouseEvent) => handleTextClick(
83-
event,
84-
currDocText,
85-
containerRef as RefObject<HTMLDivElement>,
86-
stopAndPlayFromIndex,
87-
isProcessing
88-
);
89-
container.addEventListener('click', handleClick);
90-
return () => {
91-
container.removeEventListener('click', handleClick);
92-
};
93-
}, [currDocText, handleTextClick, stopAndPlayFromIndex, isProcessing]);
94-
9547
useEffect(() => {
9648
/*
9749
* Handles highlighting the current sentence being read by TTS.
@@ -103,7 +55,11 @@ export function PDFViewer({ zoomLevel }: PDFViewerProps) {
10355
* - highlightPattern: Function from context that could change
10456
* - clearHighlights: Function from context that could change
10557
*/
106-
if (!currDocText) return;
58+
59+
if (!currDocText || !pdfHighlightEnabled) {
60+
clearHighlights();
61+
return;
62+
}
10763

10864
const highlightTimeout = setTimeout(() => {
10965
if (containerRef.current) {
@@ -115,7 +71,7 @@ export function PDFViewer({ zoomLevel }: PDFViewerProps) {
11571
clearTimeout(highlightTimeout);
11672
clearHighlights();
11773
};
118-
}, [currDocText, currentSentence, highlightPattern, clearHighlights]);
74+
}, [currDocText, currentSentence, highlightPattern, clearHighlights, pdfHighlightEnabled]);
11975

12076
// Add page dimensions state
12177
const [pageWidth, setPageWidth] = useState<number>(595); // default A4 width

src/contexts/ConfigContext.tsx

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type ConfigValues = {
3030
ttsInstructions: string;
3131
savedVoices: SavedVoices;
3232
smartSentenceSplitting: boolean;
33+
pdfHighlightEnabled: boolean;
3334
};
3435

3536
/** Interface defining the configuration context shape and functionality */
@@ -55,6 +56,7 @@ interface ConfigContextType {
5556
updateConfigKey: <K extends keyof ConfigValues>(key: K, value: ConfigValues[K]) => Promise<void>;
5657
isLoading: boolean;
5758
isDBReady: boolean;
59+
pdfHighlightEnabled: boolean;
5860
}
5961

6062
const ConfigContext = createContext<ConfigContextType | undefined>(undefined);
@@ -76,14 +78,15 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
7678
const [skipBlank, setSkipBlank] = useState<boolean>(true);
7779
const [epubTheme, setEpubTheme] = useState<boolean>(false);
7880
const [smartSentenceSplitting, setSmartSentenceSplitting] = useState<boolean>(true);
79-
const [headerMargin, setHeaderMargin] = useState<number>(0.07);
80-
const [footerMargin, setFooterMargin] = useState<number>(0.07);
81-
const [leftMargin, setLeftMargin] = useState<number>(0.07);
82-
const [rightMargin, setRightMargin] = useState<number>(0.07);
81+
const [headerMargin, setHeaderMargin] = useState<number>(0.0);
82+
const [footerMargin, setFooterMargin] = useState<number>(0.0);
83+
const [leftMargin, setLeftMargin] = useState<number>(0.0);
84+
const [rightMargin, setRightMargin] = useState<number>(0.0);
8385
const [ttsProvider, setTTSProvider] = useState<string>('custom-openai');
8486
const [ttsModel, setTTSModel] = useState<string>('kokoro');
8587
const [ttsInstructions, setTTSInstructions] = useState<string>('');
8688
const [savedVoices, setSavedVoices] = useState<SavedVoices>({});
89+
const [pdfHighlightEnabled, setPdfHighlightEnabled] = useState<boolean>(true);
8790

8891
const [isLoading, setIsLoading] = useState(true);
8992
const [isDBReady, setIsDBReady] = useState(false);
@@ -115,6 +118,7 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
115118
const cachedTTSModel = await getItem('ttsModel');
116119
const cachedTTSInstructions = await getItem('ttsInstructions');
117120
const cachedSavedVoices = await getItem('savedVoices');
121+
const cachedPdfHighlightEnabled = await getItem('pdfHighlightEnabled');
118122

119123
// Migration logic: infer provider and baseUrl for returning users
120124
let inferredProvider = cachedTTSProvider || '';
@@ -200,6 +204,7 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
200204
const finalModel = cachedTTSModel || (inferredProvider === 'openai' ? 'tts-1' : inferredProvider === 'deepinfra' ? 'hexgrad/Kokoro-82M' : 'kokoro');
201205
setTTSModel(finalModel);
202206
setTTSInstructions(cachedTTSInstructions || '');
207+
setPdfHighlightEnabled(cachedPdfHighlightEnabled === 'false' ? false : true);
203208

204209
// Restore voice for current provider-model if available in savedVoices
205210
const voiceKey = getVoiceKey(inferredProvider || 'custom-openai', finalModel);
@@ -219,8 +224,8 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
219224
if (cachedSmartSentenceSplitting === null) {
220225
await setItem('smartSentenceSplitting', 'true');
221226
}
222-
if (cachedHeaderMargin === null) await setItem('headerMargin', '0.07');
223-
if (cachedFooterMargin === null) await setItem('footerMargin', '0.07');
227+
if (cachedHeaderMargin === null) await setItem('headerMargin', '0.0');
228+
if (cachedFooterMargin === null) await setItem('footerMargin', '0.0');
224229
if (cachedLeftMargin === null) await setItem('leftMargin', '0.0');
225230
if (cachedRightMargin === null) await setItem('rightMargin', '0.0');
226231
if (cachedTTSProvider === null && inferredProvider) {
@@ -247,6 +252,10 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
247252
// Always ensure voice is not stored standalone - only in savedVoices
248253
await removeItem('voice');
249254

255+
if (cachedPdfHighlightEnabled === null) {
256+
await setItem('pdfHighlightEnabled', 'true');
257+
}
258+
250259
} catch (error) {
251260
console.error('Error initializing:', error);
252261
} finally {
@@ -379,6 +388,9 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
379388
case 'ttsInstructions':
380389
setTTSInstructions(value as string);
381390
break;
391+
case 'pdfHighlightEnabled':
392+
setPdfHighlightEnabled(value as boolean);
393+
break;
382394
}
383395
}
384396
} catch (error) {
@@ -411,7 +423,8 @@ export function ConfigProvider({ children }: { children: ReactNode }) {
411423
updateConfig,
412424
updateConfigKey,
413425
isLoading,
414-
isDBReady
426+
isDBReady,
427+
pdfHighlightEnabled
415428
}}>
416429
{children}
417430
</ConfigContext.Provider>

src/contexts/PDFContext.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ interface PDFContextType {
6262
pdfText: string,
6363
containerRef: RefObject<HTMLDivElement>,
6464
stopAndPlayFromIndex: (index: number) => void,
65-
isProcessing: boolean
65+
isProcessing: boolean,
66+
enableHighlight?: boolean
6667
) => void;
6768
createFullAudioBook: (onProgress: (progress: number) => void, signal?: AbortSignal, onChapterComplete?: (chapter: { index: number; title: string; duration?: number; status: 'pending' | 'generating' | 'completed' | 'error'; bookId?: string; format?: 'mp3' | 'm4b' }) => void, bookId?: string, format?: 'mp3' | 'm4b') => Promise<string>;
6869
regenerateChapter: (chapterIndex: number, bookId: string, format: 'mp3' | 'm4b', onProgress: (progress: number) => void, signal: AbortSignal) => Promise<{ index: number; title: string; duration?: number; status: 'pending' | 'generating' | 'completed' | 'error'; bookId?: string; format?: 'mp3' | 'm4b' }>;

src/utils/nlp.ts

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import nlp from 'compromise';
99

10-
const MAX_BLOCK_LENGTH = 300;
10+
const MAX_BLOCK_LENGTH = 450;
1111

1212
/**
1313
* Preprocesses text for audio generation by cleaning up various text artifacts
@@ -79,13 +79,8 @@ export const processTextToSentences = (text: string): string[] => {
7979
return [];
8080
}
8181

82-
if (text.length <= MAX_BLOCK_LENGTH) {
83-
// Single sentence preprocessing
84-
const cleanedText = preprocessSentenceForAudio(text);
85-
return [cleanedText];
86-
}
87-
88-
// Full text splitting into sentences
82+
// Always use the full splitting logic so we consistently respect
83+
// sentence boundaries and quoted dialogue, even for shorter texts.
8984
return splitIntoSentences(text);
9085
};
9186

@@ -160,20 +155,18 @@ const countDoubleQuotes = (s: string): number => {
160155
return matches ? matches.length : 0;
161156
};
162157

163-
const countCurlySingleQuotes = (s: string): number => {
164-
const matches = s.match(/[]/g);
165-
return matches ? matches.length : 0;
166-
};
167-
168-
const countStandaloneStraightSingles = (s: string): number => {
158+
// Replace the old curly single-quote counter and standalone-straight counter with a unified, context-aware counter
159+
const countNonApostropheSingleQuotes = (s: string): number => {
169160
let count = 0;
170161
for (let i = 0; i < s.length; i++) {
171-
if (s[i] === "'") {
162+
const ch = s[i];
163+
if (ch === "'" || ch === '‘' || ch === '’') {
172164
const prev = i > 0 ? s[i - 1] : '';
173165
const next = i + 1 < s.length ? s[i + 1] : '';
174166
const isPrevAlphaNum = /[A-Za-z0-9]/.test(prev);
175167
const isNextAlphaNum = /[A-Za-z0-9]/.test(next);
176-
// Count only when not clearly an apostrophe inside a word (e.g., don't)
168+
// Treat as a real quote mark only when it's not clearly an apostrophe
169+
// between two alphanumeric characters (e.g., don't, WizardLM’s).
177170
if (!(isPrevAlphaNum && isNextAlphaNum)) {
178171
count++;
179172
}
@@ -191,7 +184,10 @@ const mergeQuotedDialogue = (rawSentences: string[]): string[] => {
191184
for (const s of rawSentences) {
192185
const t = s.trim();
193186
const dblCount = countDoubleQuotes(t);
194-
const singleCount = countCurlySingleQuotes(t) + countStandaloneStraightSingles(t);
187+
// Use the new context-aware single-quote counter so curly apostrophes
188+
// inside words don't incorrectly toggle quote state and merge large
189+
// regions of plain prose into one block.
190+
const singleCount = countNonApostropheSingleQuotes(t);
195191

196192
if (insideDouble || insideSingle) {
197193
buffer = buffer ? `${buffer} ${t}` : t;

0 commit comments

Comments
 (0)