Skip to content

Commit d7bea48

Browse files
author
HugoFara
committed
fix(text reading): Incorrect text formating with punctuation signs
Fixes #125
1 parent b3b3869 commit d7bea48

File tree

5 files changed

+137
-8
lines changed

5 files changed

+137
-8
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ ones are marked like "v1.0.0-fork".
9595
* **SQL Prefix Queries**: Fixed `prefixQuery()` in database migrations:
9696
* Now handles `DROP TABLE IF EXISTS` syntax (previously only `IF NOT EXISTS`).
9797
* Now case-insensitive for SQL keywords (CREATE, DROP, ALTER, INSERT).
98+
* **Punctuation Formatting** ([#125](https://github.com/HugoFara/lwt/issues/125)):
99+
Fixed punctuation marks (periods, commas, quotation marks) breaking away from
100+
adjacent words onto separate lines. Punctuation now stays "stuck" to the word
101+
it belongs to by wrapping word+punctuation pairs in non-breaking groups.
98102

99103
### Security
100104

assets/css/styles.css

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/backend/Services/TextReadingService.php

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,12 @@ public function parseItem(
216216

217217
if ($record['TiIsNotWord'] != 0) {
218218
// The current item is not a term (likely punctuation)
219-
echo "<span id=\"$spanid\" class=\"$hidetag\">" .
220-
str_replace("", '<br />', htmlspecialchars($record['TiText'] ?? '', ENT_QUOTES, 'UTF-8')) . '</span>';
219+
$text = $record['TiText'] ?? '';
220+
// Add 'punc' class for punctuation (non-whitespace non-words)
221+
$puncClass = (trim($text) !== '' && !ctype_space($text)) ? 'punc' : '';
222+
$classes = trim($hidetag . ' ' . $puncClass);
223+
echo "<span id=\"$spanid\" class=\"$classes\">" .
224+
str_replace("", '<br />', htmlspecialchars($text, ENT_QUOTES, 'UTF-8')) . '</span>';
221225
} else {
222226
// A term (word or multi-word)
223227
$this->echoTerm(

src/frontend/css/base/styles.css

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,34 @@ span.wsty, span.mwsty
445445
color: #000000;
446446
}
447447

448+
/* Punctuation marks - stick to adjacent words */
449+
.punc
450+
{
451+
/* Negative margin closes the gap created by word's margin-right */
452+
margin-left: -2px;
453+
}
454+
455+
/* Word groups - prevent line breaks between word and punctuation */
456+
.word-group
457+
{
458+
/* Prevent line breaks within the group */
459+
white-space: nowrap;
460+
display: inline;
461+
}
462+
463+
/* Inside word groups, restore normal margin for punctuation */
464+
.word-group .punc
465+
{
466+
margin-left: -2px;
467+
}
468+
469+
/* Remove margin from last element in word group */
470+
.word-group > span:last-child.wsty,
471+
.word-group > span:last-child.mwsty
472+
{
473+
margin-right: 0;
474+
}
475+
448476
/* Multi-words hint */
449477
span.mwsty
450478
{

src/frontend/js/reading/text_renderer.ts

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ function buildWordDataAttributes(word: WordData): Record<string, string> {
9898
return attrs;
9999
}
100100

101+
/**
102+
* Check if text is pure whitespace (spaces, tabs, etc. but NOT paragraph markers).
103+
*/
104+
function isWhitespace(text: string): boolean {
105+
// Paragraph markers (¶) are NOT whitespace - they become <br />
106+
return /^[\s]+$/.test(text) && !text.includes('¶');
107+
}
108+
101109
/**
102110
* Render a single word as HTML.
103111
*/
@@ -106,10 +114,14 @@ export function renderWord(word: WordData, settings: RenderSettings): string {
106114

107115
if (word.isNotWord) {
108116
// Punctuation or whitespace
109-
const hiddenClass = word.hidden ? ' hide' : '';
117+
const hiddenClass = word.hidden ? 'hide' : '';
110118
// Escape HTML first, then replace ¶ with <br /> to preserve line breaks
111119
const text = escapeHtml(word.text).replace(//g, '<br />');
112-
return `<span id="${spanId}" class="${hiddenClass}">${text}</span>`;
120+
// Add 'punc' class for punctuation (non-whitespace non-words)
121+
// This allows CSS to control line-breaking behavior
122+
const puncClass = !isWhitespace(word.text) ? 'punc' : '';
123+
const classes = [hiddenClass, puncClass].filter(Boolean).join(' ');
124+
return `<span id="${spanId}" class="${classes}">${text}</span>`;
113125
}
114126

115127
// Build classes
@@ -133,17 +145,47 @@ export function renderWord(word: WordData, settings: RenderSettings): string {
133145
return `<span id="${spanId}" class="${classes}" ${dataAttrString}>${content}</span>`;
134146
}
135147

148+
/**
149+
* Check if a word item is trailing punctuation (should stick to preceding word).
150+
* Trailing punctuation includes: . , ; : ! ? ) ] } » " ' etc.
151+
*/
152+
function isTrailingPunctuation(word: WordData): boolean {
153+
if (!word.isNotWord) return false;
154+
const text = word.text.trim();
155+
if (!text || isWhitespace(word.text)) return false;
156+
// Check if starts with common trailing punctuation
157+
const trailingPunc = /^[.,;:!?\])}\u00BB\u201D\u2019\u203A\u300B\u3009\u3011\u3015\u3017\u3019\u301B'"\u2026\u2014\u2013]/;
158+
return trailingPunc.test(text);
159+
}
160+
161+
/**
162+
* Check if a word item is leading punctuation (should stick to following word).
163+
* Leading punctuation includes: ( [ { « " ' etc.
164+
*/
165+
function isLeadingPunctuation(word: WordData): boolean {
166+
if (!word.isNotWord) return false;
167+
const text = word.text.trim();
168+
if (!text || isWhitespace(word.text)) return false;
169+
// Check if starts with common leading punctuation
170+
const leadingPunc = /^[(\[{\u00AB\u201C\u2018\u2039\u300A\u3008\u3010\u3014\u3016\u3018\u301A]/;
171+
return leadingPunc.test(text);
172+
}
173+
136174
/**
137175
* Render all words as HTML, grouped by sentences.
176+
* Words and adjacent punctuation are wrapped together to prevent line breaks.
138177
*/
139178
export function renderText(words: WordData[], settings: RenderSettings): string {
140179
if (words.length === 0) return '';
141180

142181
const parts: string[] = [];
143182
let currentSentenceId = -1;
144183
let sentenceOpen = false;
184+
let i = 0;
185+
186+
while (i < words.length) {
187+
const word = words[i];
145188

146-
for (const word of words) {
147189
// Handle sentence boundaries
148190
if (word.sentenceId !== currentSentenceId) {
149191
if (sentenceOpen) {
@@ -154,8 +196,59 @@ export function renderText(words: WordData[], settings: RenderSettings): string
154196
sentenceOpen = true;
155197
}
156198

157-
// Render the word
158-
parts.push(renderWord(word, settings));
199+
// Check if this is a word (not punctuation/whitespace)
200+
if (!word.isNotWord) {
201+
// Collect leading punctuation (already rendered), the word, and trailing punctuation
202+
const group: string[] = [];
203+
204+
// Check for leading punctuation that was already added
205+
// (We handle this by looking ahead from leading punctuation instead)
206+
207+
// Add the word
208+
group.push(renderWord(word, settings));
209+
i++;
210+
211+
// Collect trailing punctuation
212+
while (i < words.length && words[i].sentenceId === currentSentenceId && isTrailingPunctuation(words[i])) {
213+
group.push(renderWord(words[i], settings));
214+
i++;
215+
}
216+
217+
// Wrap in a non-breaking group if we have trailing punctuation
218+
if (group.length > 1) {
219+
parts.push(`<span class="word-group">${group.join('')}</span>`);
220+
} else {
221+
parts.push(group[0]);
222+
}
223+
} else if (isLeadingPunctuation(word)) {
224+
// Leading punctuation - collect it with the following word
225+
const group: string[] = [];
226+
group.push(renderWord(word, settings));
227+
i++;
228+
229+
// Get the following word if it exists and is in the same sentence
230+
if (i < words.length && !words[i].isNotWord && words[i].sentenceId === currentSentenceId) {
231+
group.push(renderWord(words[i], settings));
232+
i++;
233+
234+
// Also collect any trailing punctuation after the word
235+
while (i < words.length && words[i].sentenceId === currentSentenceId && isTrailingPunctuation(words[i])) {
236+
group.push(renderWord(words[i], settings));
237+
i++;
238+
}
239+
}
240+
241+
// Wrap in a non-breaking group
242+
if (group.length > 1) {
243+
parts.push(`<span class="word-group">${group.join('')}</span>`);
244+
} else {
245+
parts.push(group[0]);
246+
}
247+
} else {
248+
// Regular non-word (whitespace or other punctuation)
249+
parts.push(renderWord(word, settings));
250+
i++;
251+
}
159252
}
160253

161254
// Close last sentence

0 commit comments

Comments
 (0)