@@ -15,35 +15,42 @@ export const preprocessSentenceForAudio = (text: string): string => {
1515 . trim ( ) ;
1616} ;
1717
18- const MAX_BLOCK_LENGTH = 250 ; // Maximum characters per block
18+ const MAX_BLOCK_LENGTH = 300 ; // Maximum characters per block
1919
2020export const splitIntoSentences = ( text : string ) : string [ ] => {
21- // Preprocess the text before splitting into sentences
22- const cleanedText = preprocessSentenceForAudio ( text ) ;
23- const doc = nlp ( cleanedText ) ;
24- const rawSentences = doc . sentences ( ) . out ( 'array' ) as string [ ] ;
25-
21+ // Split text into paragraphs first
22+ const paragraphs = text . split ( / \n + / ) ;
2623 const blocks : string [ ] = [ ] ;
27- let currentBlock = '' ;
2824
29- for ( const sentence of rawSentences ) {
30- const trimmedSentence = sentence . trim ( ) ;
25+ for ( const paragraph of paragraphs ) {
26+ if ( ! paragraph . trim ( ) ) continue ;
27+
28+ // Preprocess each paragraph
29+ const cleanedText = preprocessSentenceForAudio ( paragraph ) ;
30+ const doc = nlp ( cleanedText ) ;
31+ const rawSentences = doc . sentences ( ) . out ( 'array' ) as string [ ] ;
3132
32- // If adding this sentence would exceed the limit, start a new block
33- if ( currentBlock && ( currentBlock . length + trimmedSentence . length + 1 ) > MAX_BLOCK_LENGTH ) {
34- blocks . push ( currentBlock . trim ( ) ) ;
35- currentBlock = trimmedSentence ;
36- } else {
37- // Add to current block with a space if not empty
38- currentBlock = currentBlock
39- ? `${ currentBlock } ${ trimmedSentence } `
40- : trimmedSentence ;
33+ let currentBlock = '' ;
34+
35+ for ( const sentence of rawSentences ) {
36+ const trimmedSentence = sentence . trim ( ) ;
37+
38+ // If adding this sentence would exceed the limit, start a new block
39+ if ( currentBlock && ( currentBlock . length + trimmedSentence . length + 1 ) > MAX_BLOCK_LENGTH ) {
40+ blocks . push ( currentBlock . trim ( ) ) ;
41+ currentBlock = trimmedSentence ;
42+ } else {
43+ // Add to current block with a space if not empty
44+ currentBlock = currentBlock
45+ ? `${ currentBlock } ${ trimmedSentence } `
46+ : trimmedSentence ;
47+ }
4148 }
42- }
4349
44- // Add the last block if not empty
45- if ( currentBlock ) {
46- blocks . push ( currentBlock . trim ( ) ) ;
50+ // Add the last block if not empty
51+ if ( currentBlock ) {
52+ blocks . push ( currentBlock . trim ( ) ) ;
53+ }
4754 }
4855
4956 return blocks ;
0 commit comments