Skip to content

Commit 54e9604

Browse files
EladDvjplexer
authored andcommitted
applib/graphics: Add support for mixed RTL-LTR text with correct justifying and switching
Signed-off-by: Elad Dvash <e.d.dvash@gmail.com>
1 parent 9d34cae commit 54e9604

File tree

1 file changed

+215
-62
lines changed

1 file changed

+215
-62
lines changed

src/fw/applib/graphics/text_layout.c

Lines changed: 215 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,48 @@
4343

4444
static bool prv_char_iter_next_start_of_word(Iterator* char_iter);
4545

46+
//! Check if a codepoint is punctuation (should be ignored for RTL detection)
47+
static bool prv_codepoint_is_punctuation(Codepoint cp) {
48+
// ASCII punctuation
49+
if ((cp >= 0x21 && cp <= 0x2F) || // ! " # $ % & ' ( ) * + , - . /
50+
(cp >= 0x3A && cp <= 0x40) || // : ; < = > ? @
51+
(cp >= 0x5B && cp <= 0x60) || // [ \ ] ^ _ `
52+
(cp >= 0x7B && cp <= 0x7E)) { // { | } ~
53+
return true;
54+
}
55+
// General punctuation block (U+2000-U+206F) - includes dashes, quotes, etc.
56+
if (cp >= 0x2000 && cp <= 0x206F) {
57+
return true;
58+
}
59+
return false;
60+
}
61+
62+
//! Check if text starts with an RTL (right-to-left) character
63+
//! Skips leading whitespace, newlines, and punctuation to find the first letter
64+
static bool prv_utf8_starts_with_rtl(const utf8_t *start, const utf8_t *end) {
65+
if (start == NULL || end == NULL || start >= end) {
66+
return false;
67+
}
68+
69+
utf8_t *ptr = (utf8_t *)start;
70+
while (ptr < end && *ptr != '\0') {
71+
utf8_t *next = NULL;
72+
Codepoint cp = utf8_peek_codepoint(ptr, &next);
73+
if (cp == 0 || next == NULL) {
74+
break;
75+
}
76+
// Skip whitespace, newlines, and punctuation
77+
if (cp == SPACE_CODEPOINT || cp == NEWLINE_CODEPOINT ||
78+
codepoint_is_zero_width(cp) || prv_codepoint_is_punctuation(cp)) {
79+
ptr = next;
80+
continue;
81+
}
82+
// Found first letter character, check if RTL
83+
return codepoint_is_rtl(cp);
84+
}
85+
return false;
86+
}
87+
4688
// PBL-23045 Eventually remove perimeter debugging
4789
void graphics_text_perimeter_debugging_enable(bool enable) {
4890
app_state_set_text_perimeter_debugging_enabled(enable);
@@ -498,95 +540,206 @@ utf8_t* walk_line(GContext* ctx, Line* line, const TextBoxParams* const text_box
498540
return NULL;
499541
}
500542

501-
// RTL support: check if this line contains RTL characters and reverse if rendering
502-
// Arabic text also needs shaping (connecting letters) BEFORE reversal
503-
// Use smaller buffers to reduce stack usage (max 16 codepoints * 4 bytes = 64)
504-
utf8_t shaped_buffer[64]; // Buffer for Arabic shaping
505-
utf8_t rtl_buffer[64]; // Buffer for RTL reversal
506-
const utf8_t *render_start = line->start;
507-
bool is_rtl = false;
543+
// RTL support: segment-based rendering for mixed RTL/LTR text
544+
// Each RTL segment is reversed individually, LTR segments render normally
508545
bool is_rendering = (char_visitor_cb == render_chars_char_visitor_cb);
509546

547+
// For segment-based RTL rendering during render pass
510548
if (is_rendering && line->start != NULL && text_box_params->utf8_bounds != NULL &&
511549
text_box_params->utf8_bounds->end != NULL &&
512-
text_box_params->utf8_bounds->end > line->start) {
513-
// Find line end by calculating byte length from width
514-
// We need to find where the line actually ends
515-
utf8_t *line_end = (utf8_t *)text_box_params->utf8_bounds->end;
550+
text_box_params->utf8_bounds->end > line->start &&
551+
utf8_contains_rtl(line->start, text_box_params->utf8_bounds->end)) {
516552

517-
// Check if this line contains RTL text
518-
if (utf8_contains_rtl(line->start, line_end)) {
519-
// Calculate the actual line length to reverse
520-
// Use the line width to determine how much text fits
521-
size_t src_len = 0;
522-
utf8_t *ptr = (utf8_t *)line->start;
523-
int width_so_far = 0;
524-
525-
while (ptr < line_end && *ptr != '\0' && *ptr != '\n') {
526-
utf8_t *next = NULL;
527-
Codepoint cp = utf8_peek_codepoint(ptr, &next);
528-
if (cp == 0 || next == NULL) {
553+
// Render using segment-based approach
554+
utf8_t *ptr = (utf8_t *)line->start;
555+
utf8_t *line_end = (utf8_t *)text_box_params->utf8_bounds->end;
556+
int walked_width_px = 0;
557+
utf8_t* last_visited_char = NULL;
558+
559+
while (ptr < line_end && *ptr != '\0' && *ptr != '\n' &&
560+
walked_width_px + suffix_width_px <= available_horiz_px) {
561+
562+
// Find segment start and determine if RTL
563+
utf8_t *segment_start = ptr;
564+
utf8_t *next = NULL;
565+
Codepoint first_cp = utf8_peek_codepoint(ptr, &next);
566+
if (first_cp == 0 || next == NULL) break;
567+
568+
// Skip leading punctuation/spaces to determine segment type
569+
bool segment_is_rtl = false;
570+
utf8_t *check_ptr = ptr;
571+
while (check_ptr < line_end && *check_ptr != '\0' && *check_ptr != '\n') {
572+
utf8_t *check_next = NULL;
573+
Codepoint check_cp = utf8_peek_codepoint(check_ptr, &check_next);
574+
if (check_cp == 0 || check_next == NULL) break;
575+
if (!prv_codepoint_is_punctuation(check_cp) &&
576+
check_cp != SPACE_CODEPOINT && !codepoint_is_zero_width(check_cp)) {
577+
segment_is_rtl = codepoint_is_rtl(check_cp);
529578
break;
530579
}
580+
check_ptr = check_next;
581+
}
582+
583+
// Collect segment (until we hit opposite script type or end)
584+
utf8_t *segment_end = ptr;
585+
utf8_t *segment_last_char = ptr; // Track start of last character in segment
586+
int segment_width_px = 0; // Track width within this segment
587+
while (segment_end < line_end && *segment_end != '\0' && *segment_end != '\n') {
588+
utf8_t *seg_next = NULL;
589+
Codepoint seg_cp = utf8_peek_codepoint(segment_end, &seg_next);
590+
if (seg_cp == 0 || seg_next == NULL) break;
591+
592+
// Check if this character changes the segment type
593+
if (!prv_codepoint_is_punctuation(seg_cp) &&
594+
seg_cp != SPACE_CODEPOINT && !codepoint_is_zero_width(seg_cp)) {
595+
bool char_is_rtl = codepoint_is_rtl(seg_cp);
596+
if (char_is_rtl != segment_is_rtl) {
597+
break; // End of segment
598+
}
599+
}
600+
601+
// For RTL segments: don't include trailing spaces before LTR text
602+
// This prevents the space from being reversed to the wrong position
603+
if (segment_is_rtl && seg_cp == SPACE_CODEPOINT) {
604+
// Look ahead to see if next non-space char is LTR
605+
utf8_t *look_ptr = seg_next;
606+
while (look_ptr < line_end && *look_ptr != '\0' && *look_ptr != '\n') {
607+
utf8_t *look_next = NULL;
608+
Codepoint look_cp = utf8_peek_codepoint(look_ptr, &look_next);
609+
if (look_cp == 0 || look_next == NULL) break;
610+
if (look_cp != SPACE_CODEPOINT && !codepoint_is_zero_width(look_cp) &&
611+
!prv_codepoint_is_punctuation(look_cp)) {
612+
// Found a letter - if it's LTR, end segment before the space
613+
if (!codepoint_is_rtl(look_cp)) {
614+
goto end_segment; // Break out of collection loop
615+
}
616+
break;
617+
}
618+
look_ptr = look_next;
619+
}
620+
}
621+
622+
// Check width constraint (must account for walked width + accumulated segment width)
531623
int glyph_width = prv_codepoint_get_horizontal_advance(&ctx->font_cache,
532-
text_box_params->font, cp);
533-
if (width_so_far + glyph_width > available_horiz_px) {
624+
text_box_params->font, seg_cp);
625+
if (walked_width_px + segment_width_px + glyph_width + suffix_width_px > available_horiz_px) {
534626
break;
535627
}
536-
width_so_far += glyph_width;
537-
ptr = next;
538-
}
539-
src_len = ptr - line->start;
540628

541-
// Limit src_len to fit in our buffer (with room for UTF-8 expansion)
542-
if (src_len > sizeof(rtl_buffer) - 4) {
543-
src_len = sizeof(rtl_buffer) - 4;
629+
segment_width_px += glyph_width;
630+
segment_last_char = segment_end; // Track this as the last character before advancing
631+
segment_end = seg_next;
544632
}
633+
end_segment:
634+
635+
size_t segment_len = segment_end - segment_start;
636+
if (segment_len == 0) break;
637+
638+
// Render the segment
639+
if (segment_is_rtl) {
640+
// RTL segment: shape Arabic if needed, then reverse and render
641+
utf8_t shaped_buffer[64];
642+
utf8_t rtl_buffer[64];
643+
const utf8_t *to_render = segment_start;
644+
size_t render_len = segment_len;
645+
646+
// Limit to buffer size
647+
if (render_len > sizeof(rtl_buffer) - 4) {
648+
render_len = sizeof(rtl_buffer) - 4;
649+
}
545650

546-
if (src_len > 0) {
547-
// Step 1: Shape Arabic text first (connect letters based on context)
548-
// This must happen BEFORE RTL reversal to analyze letter positions correctly
549-
const utf8_t *to_reverse = line->start;
550-
size_t to_reverse_len = src_len;
551-
552-
if (utf8_contains_arabic(line->start, ptr)) {
553-
size_t shaped_len = arabic_shape_text(line->start, src_len,
651+
// Shape Arabic text first
652+
if (utf8_contains_arabic(segment_start, segment_end)) {
653+
size_t shaped_len = arabic_shape_text(segment_start, render_len,
554654
shaped_buffer, sizeof(shaped_buffer) - 1);
555655
if (shaped_len > 0) {
556656
shaped_buffer[shaped_len] = '\0';
557-
to_reverse = shaped_buffer;
558-
to_reverse_len = shaped_len;
657+
to_render = shaped_buffer;
658+
render_len = shaped_len;
559659
}
560660
}
561661

562-
// Step 2: Then reverse for RTL display
563-
size_t reversed_len = utf8_reverse_for_rtl(to_reverse, to_reverse_len,
662+
// Reverse for RTL display
663+
size_t reversed_len = utf8_reverse_for_rtl(to_render, render_len,
564664
rtl_buffer, sizeof(rtl_buffer) - 1);
565665
if (reversed_len > 0) {
566666
rtl_buffer[reversed_len] = '\0';
567-
render_start = rtl_buffer;
568-
is_rtl = true;
667+
668+
// Render reversed segment
669+
utf8_t *rptr = rtl_buffer;
670+
while (*rptr != '\0') {
671+
utf8_t *rnext = NULL;
672+
Codepoint rcp = utf8_peek_codepoint(rptr, &rnext);
673+
if (rcp == 0 || rnext == NULL) break;
674+
675+
int glyph_width = prv_codepoint_get_horizontal_advance(&ctx->font_cache,
676+
text_box_params->font, rcp);
677+
678+
GRect cursor = {
679+
.origin = line->origin,
680+
.size.w = glyph_width,
681+
.size.h = fonts_get_font_height(text_box_params->font)
682+
};
683+
cursor.origin.x += walked_width_px;
684+
685+
if (!codepoint_is_zero_width(rcp)) {
686+
render_glyph(ctx, rcp, text_box_params->font, cursor);
687+
}
688+
689+
walked_width_px += glyph_width;
690+
rptr = rnext;
691+
}
692+
}
693+
} else {
694+
// LTR segment: render normally
695+
utf8_t *sptr = segment_start;
696+
while (sptr < segment_end) {
697+
utf8_t *snext = NULL;
698+
Codepoint scp = utf8_peek_codepoint(sptr, &snext);
699+
if (scp == 0 || snext == NULL) break;
700+
701+
int glyph_width = prv_codepoint_get_horizontal_advance(&ctx->font_cache,
702+
text_box_params->font, scp);
703+
704+
GRect cursor = {
705+
.origin = line->origin,
706+
.size.w = glyph_width,
707+
.size.h = fonts_get_font_height(text_box_params->font)
708+
};
709+
cursor.origin.x += walked_width_px;
710+
711+
if (!codepoint_is_zero_width(scp)) {
712+
render_glyph(ctx, scp, text_box_params->font, cursor);
713+
}
714+
715+
walked_width_px += glyph_width;
716+
last_visited_char = sptr;
717+
sptr = snext;
569718
}
570719
}
720+
721+
ptr = segment_end;
722+
last_visited_char = segment_last_char;
571723
}
724+
725+
// Handle suffix if present
726+
if (line->suffix_codepoint) {
727+
GRect cursor = {
728+
.origin = line->origin,
729+
.size.w = suffix_width_px,
730+
.size.h = fonts_get_font_height(text_box_params->font)
731+
};
732+
cursor.origin.x += walked_width_px;
733+
render_glyph(ctx, line->suffix_codepoint, text_box_params->font, cursor);
734+
}
735+
736+
return last_visited_char;
572737
}
573738

574-
// Set up iterator - use reversed buffer for RTL rendering
739+
// Standard rendering path (no RTL or not rendering)
575740
Iterator char_iter;
576741
CharIterState char_iter_state;
577-
TextBoxParams rtl_text_box_params;
578-
Utf8Bounds rtl_bounds;
579-
580-
if (is_rtl) {
581-
// Create temporary bounds and params for the reversed text
582-
rtl_bounds.start = (utf8_t *)render_start;
583-
rtl_bounds.end = (utf8_t *)render_start + strlen((const char *)render_start);
584-
rtl_text_box_params = *text_box_params;
585-
rtl_text_box_params.utf8_bounds = &rtl_bounds;
586-
char_iter_init(&char_iter, &char_iter_state, &rtl_text_box_params, (utf8_t *)render_start);
587-
} else {
588-
char_iter_init(&char_iter, &char_iter_state, text_box_params, line->start);
589-
}
742+
char_iter_init(&char_iter, &char_iter_state, text_box_params, line->start);
590743
Utf8IterState* utf8_iter_state = (Utf8IterState*) &char_iter_state.utf8_iter_state;
591744

592745
bool is_newline_as_space = text_box_params->overflow_mode == GTextOverflowModeFill;
@@ -1008,9 +1161,9 @@ static void prv_line_justify(Line* line, const TextBoxParams* const text_box_par
10081161
// Determine effective alignment - RTL text defaults to right alignment
10091162
GTextAlignment effective_alignment = text_box_params->alignment;
10101163

1011-
// If alignment is left (default) and text contains RTL, switch to right
1164+
// If alignment is left (default) and text starts with RTL, switch to right
10121165
if (effective_alignment == GTextAlignmentLeft && line->start != NULL) {
1013-
if (utf8_contains_rtl(line->start, text_box_params->utf8_bounds->end)) {
1166+
if (prv_utf8_starts_with_rtl(line->start, text_box_params->utf8_bounds->end)) {
10141167
effective_alignment = GTextAlignmentRight;
10151168
}
10161169
}

0 commit comments

Comments
 (0)