@@ -98,6 +98,14 @@ function buildWordDataAttributes(word: WordData): Record<string, string> {
9898 return attrs ;
9999}
100100
101+ /**
102+ * Check if text is pure whitespace (spaces, tabs, etc. but NOT paragraph markers).
103+ */
104+ function isWhitespace ( text : string ) : boolean {
105+ // Paragraph markers (¶) are NOT whitespace - they become <br />
106+ return / ^ [ \s ] + $ / . test ( text ) && ! text . includes ( '¶' ) ;
107+ }
108+
101109/**
102110 * Render a single word as HTML.
103111 */
@@ -106,10 +114,14 @@ export function renderWord(word: WordData, settings: RenderSettings): string {
106114
107115 if ( word . isNotWord ) {
108116 // Punctuation or whitespace
109- const hiddenClass = word . hidden ? ' hide' : '' ;
117+ const hiddenClass = word . hidden ? 'hide' : '' ;
110118 // Escape HTML first, then replace ¶ with <br /> to preserve line breaks
111119 const text = escapeHtml ( word . text ) . replace ( / ¶ / g, '<br />' ) ;
112- return `<span id="${ spanId } " class="${ hiddenClass } ">${ text } </span>` ;
120+ // Add 'punc' class for punctuation (non-whitespace non-words)
121+ // This allows CSS to control line-breaking behavior
122+ const puncClass = ! isWhitespace ( word . text ) ? 'punc' : '' ;
123+ const classes = [ hiddenClass , puncClass ] . filter ( Boolean ) . join ( ' ' ) ;
124+ return `<span id="${ spanId } " class="${ classes } ">${ text } </span>` ;
113125 }
114126
115127 // Build classes
@@ -133,17 +145,47 @@ export function renderWord(word: WordData, settings: RenderSettings): string {
133145 return `<span id="${ spanId } " class="${ classes } " ${ dataAttrString } >${ content } </span>` ;
134146}
135147
148+ /**
149+ * Check if a word item is trailing punctuation (should stick to preceding word).
150+ * Trailing punctuation includes: . , ; : ! ? ) ] } » " ' etc.
151+ */
152+ function isTrailingPunctuation ( word : WordData ) : boolean {
153+ if ( ! word . isNotWord ) return false ;
154+ const text = word . text . trim ( ) ;
155+ if ( ! text || isWhitespace ( word . text ) ) return false ;
156+ // Check if starts with common trailing punctuation
157+ const trailingPunc = / ^ [ . , ; : ! ? \] ) } \u00BB \u201D \u2019 \u203A \u300B \u3009 \u3011 \u3015 \u3017 \u3019 \u301B ' " \u2026 \u2014 \u2013 ] / ;
158+ return trailingPunc . test ( text ) ;
159+ }
160+
161+ /**
162+ * Check if a word item is leading punctuation (should stick to following word).
163+ * Leading punctuation includes: ( [ { « " ' etc.
164+ */
165+ function isLeadingPunctuation ( word : WordData ) : boolean {
166+ if ( ! word . isNotWord ) return false ;
167+ const text = word . text . trim ( ) ;
168+ if ( ! text || isWhitespace ( word . text ) ) return false ;
169+ // Check if starts with common leading punctuation
170+ const leadingPunc = / ^ [ ( \[ { \u00AB \u201C \u2018 \u2039 \u300A \u3008 \u3010 \u3014 \u3016 \u3018 \u301A ] / ;
171+ return leadingPunc . test ( text ) ;
172+ }
173+
136174/**
137175 * Render all words as HTML, grouped by sentences.
176+ * Words and adjacent punctuation are wrapped together to prevent line breaks.
138177 */
139178export function renderText ( words : WordData [ ] , settings : RenderSettings ) : string {
140179 if ( words . length === 0 ) return '' ;
141180
142181 const parts : string [ ] = [ ] ;
143182 let currentSentenceId = - 1 ;
144183 let sentenceOpen = false ;
184+ let i = 0 ;
185+
186+ while ( i < words . length ) {
187+ const word = words [ i ] ;
145188
146- for ( const word of words ) {
147189 // Handle sentence boundaries
148190 if ( word . sentenceId !== currentSentenceId ) {
149191 if ( sentenceOpen ) {
@@ -154,8 +196,59 @@ export function renderText(words: WordData[], settings: RenderSettings): string
154196 sentenceOpen = true ;
155197 }
156198
157- // Render the word
158- parts . push ( renderWord ( word , settings ) ) ;
199+ // Check if this is a word (not punctuation/whitespace)
200+ if ( ! word . isNotWord ) {
201+ // Collect leading punctuation (already rendered), the word, and trailing punctuation
202+ const group : string [ ] = [ ] ;
203+
204+ // Check for leading punctuation that was already added
205+ // (We handle this by looking ahead from leading punctuation instead)
206+
207+ // Add the word
208+ group . push ( renderWord ( word , settings ) ) ;
209+ i ++ ;
210+
211+ // Collect trailing punctuation
212+ while ( i < words . length && words [ i ] . sentenceId === currentSentenceId && isTrailingPunctuation ( words [ i ] ) ) {
213+ group . push ( renderWord ( words [ i ] , settings ) ) ;
214+ i ++ ;
215+ }
216+
217+ // Wrap in a non-breaking group if we have trailing punctuation
218+ if ( group . length > 1 ) {
219+ parts . push ( `<span class="word-group">${ group . join ( '' ) } </span>` ) ;
220+ } else {
221+ parts . push ( group [ 0 ] ) ;
222+ }
223+ } else if ( isLeadingPunctuation ( word ) ) {
224+ // Leading punctuation - collect it with the following word
225+ const group : string [ ] = [ ] ;
226+ group . push ( renderWord ( word , settings ) ) ;
227+ i ++ ;
228+
229+ // Get the following word if it exists and is in the same sentence
230+ if ( i < words . length && ! words [ i ] . isNotWord && words [ i ] . sentenceId === currentSentenceId ) {
231+ group . push ( renderWord ( words [ i ] , settings ) ) ;
232+ i ++ ;
233+
234+ // Also collect any trailing punctuation after the word
235+ while ( i < words . length && words [ i ] . sentenceId === currentSentenceId && isTrailingPunctuation ( words [ i ] ) ) {
236+ group . push ( renderWord ( words [ i ] , settings ) ) ;
237+ i ++ ;
238+ }
239+ }
240+
241+ // Wrap in a non-breaking group
242+ if ( group . length > 1 ) {
243+ parts . push ( `<span class="word-group">${ group . join ( '' ) } </span>` ) ;
244+ } else {
245+ parts . push ( group [ 0 ] ) ;
246+ }
247+ } else {
248+ // Regular non-word (whitespace or other punctuation)
249+ parts . push ( renderWord ( word , settings ) ) ;
250+ i ++ ;
251+ }
159252 }
160253
161254 // Close last sentence
0 commit comments