Skip to content

Commit 2973965

Browse files
committed
Better nlp + refactors
1 parent 4320730 commit 2973965

File tree

5 files changed

+44
-37
lines changed

5 files changed

+44
-37
lines changed

src/components/EPUBViewer.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,10 @@ export function EPUBViewer({ className = '' }: EPUBViewerProps) {
2727
const rendition = useRef<Rendition | undefined>(undefined);
2828
const toc = useRef<NavItem[]>([]);
2929
const locationRef = useRef<string | number>(currDocPage);
30+
3031

3132
const handleLocationChanged = useCallback((location: string | number, initial = false) => {
32-
// Handle special 'next' and 'prev' cases
33+
// Handle special 'next' and 'prev' cases, which
3334
if (location === 'next' && rendition.current) {
3435
rendition.current.next();
3536
return;

src/contexts/EPUBContext.tsx

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,7 @@ export function EPUBProvider({ children }: { children: ReactNode }) {
8484
* Extracts text content from the current EPUB page/location
8585
*/
8686
const extractPageText = useCallback(async (book: Book, rendition: Rendition): Promise<string> => {
87-
try {
88-
console.log('Extracting EPUB text from current location');
89-
87+
try {
9088
const { start, end } = rendition.location;
9189
if (!start?.cfi || !end?.cfi) return '';
9290

src/contexts/TTSContext.tsx

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ export function TTSProvider({ children }: { children: React.ReactNode }) {
102102

103103
// Use custom hooks
104104
const audioContext = useAudioContext();
105-
const audioCache = useAudioCache(50);
105+
const audioCache = useAudioCache(25);
106106
const { availableVoices, fetchVoices } = useVoiceManagement(openApiKey, openApiBaseUrl);
107107

108108
// Add ref for location change handler
@@ -120,22 +120,21 @@ export function TTSProvider({ children }: { children: React.ReactNode }) {
120120
* State Management
121121
*/
122122
const [isPlaying, setIsPlaying] = useState(false);
123+
const [isEPUB, setIsEPUB] = useState(false);
124+
const [isProcessing, setIsProcessing] = useState(false);
125+
126+
const [currDocPage, setCurrDocPage] = useState<string | number>(1);
127+
const currDocPageNumber = (!isEPUB ? parseInt(currDocPage.toString()) : 1); // PDF uses numbers only
128+
const [currDocPages, setCurrDocPages] = useState<number>();
129+
123130
const [sentences, setSentences] = useState<string[]>([]);
124131
const [currentIndex, setCurrentIndex] = useState(0);
125132
const [activeHowl, setActiveHowl] = useState<Howl | null>(null);
126-
const [isProcessing, setIsProcessing] = useState(false);
127133
const [speed, setSpeed] = useState(voiceSpeed);
128134
const [voice, setVoice] = useState(configVoice);
129-
const [currDocPage, setCurrDocPage] = useState<string | number>(1);
130-
const [currDocPages, setCurrDocPages] = useState<number>();
131135
const [nextPageLoading, setNextPageLoading] = useState(false);
132136

133-
// Add this state to track if we're in EPUB mode
134-
const [isEPUB, setIsEPUB] = useState(false);
135-
136-
const currDocPageNumber = (!isEPUB ? parseInt(currDocPage.toString()) : 1);
137-
138-
console.log('page:', currDocPage, 'pages:', currDocPages);
137+
//console.log('page:', currDocPage, 'pages:', currDocPages);
139138

140139
/**
141140
* Changes the current page by a specified amount

src/utils/nlp.ts

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,35 +15,42 @@ export const preprocessSentenceForAudio = (text: string): string => {
1515
.trim();
1616
};
1717

18-
const MAX_BLOCK_LENGTH = 250; // Maximum characters per block
18+
const MAX_BLOCK_LENGTH = 300; // Maximum characters per block
1919

2020
export const splitIntoSentences = (text: string): string[] => {
21-
// Preprocess the text before splitting into sentences
22-
const cleanedText = preprocessSentenceForAudio(text);
23-
const doc = nlp(cleanedText);
24-
const rawSentences = doc.sentences().out('array') as string[];
25-
21+
// Split text into paragraphs first
22+
const paragraphs = text.split(/\n+/);
2623
const blocks: string[] = [];
27-
let currentBlock = '';
2824

29-
for (const sentence of rawSentences) {
30-
const trimmedSentence = sentence.trim();
25+
for (const paragraph of paragraphs) {
26+
if (!paragraph.trim()) continue;
27+
28+
// Preprocess each paragraph
29+
const cleanedText = preprocessSentenceForAudio(paragraph);
30+
const doc = nlp(cleanedText);
31+
const rawSentences = doc.sentences().out('array') as string[];
3132

32-
// If adding this sentence would exceed the limit, start a new block
33-
if (currentBlock && (currentBlock.length + trimmedSentence.length + 1) > MAX_BLOCK_LENGTH) {
34-
blocks.push(currentBlock.trim());
35-
currentBlock = trimmedSentence;
36-
} else {
37-
// Add to current block with a space if not empty
38-
currentBlock = currentBlock
39-
? `${currentBlock} ${trimmedSentence}`
40-
: trimmedSentence;
33+
let currentBlock = '';
34+
35+
for (const sentence of rawSentences) {
36+
const trimmedSentence = sentence.trim();
37+
38+
// If adding this sentence would exceed the limit, start a new block
39+
if (currentBlock && (currentBlock.length + trimmedSentence.length + 1) > MAX_BLOCK_LENGTH) {
40+
blocks.push(currentBlock.trim());
41+
currentBlock = trimmedSentence;
42+
} else {
43+
// Add to current block with a space if not empty
44+
currentBlock = currentBlock
45+
? `${currentBlock} ${trimmedSentence}`
46+
: trimmedSentence;
47+
}
4148
}
42-
}
4349

44-
// Add the last block if not empty
45-
if (currentBlock) {
46-
blocks.push(currentBlock.trim());
50+
// Add the last block if not empty
51+
if (currentBlock) {
52+
blocks.push(currentBlock.trim());
53+
}
4754
}
4855

4956
return blocks;

src/utils/pdf.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,12 @@ export function findBestTextMatch(
116116
lengthDiff: Infinity,
117117
};
118118

119+
const SPAN_SEARCH_LIMIT = 10;
120+
119121
for (let i = 0; i < elements.length; i++) {
120122
let combinedText = '';
121123
const currentElements = [];
122-
for (let j = i; j < Math.min(i + 10, elements.length); j++) {
124+
for (let j = i; j < Math.min(i + SPAN_SEARCH_LIMIT, elements.length); j++) {
123125
const node = elements[j];
124126
const newText = combinedText ? `${combinedText} ${node.text}` : node.text;
125127
if (newText.length > maxCombinedLength) break;

0 commit comments

Comments
 (0)