|
43 | 43 |
|
44 | 44 | static bool prv_char_iter_next_start_of_word(Iterator* char_iter); |
45 | 45 |
|
| 46 | +//! Check if a codepoint is punctuation (should be ignored for RTL detection) |
| 47 | +static bool prv_codepoint_is_punctuation(Codepoint cp) { |
| 48 | + // ASCII punctuation |
| 49 | + if ((cp >= 0x21 && cp <= 0x2F) || // ! " # $ % & ' ( ) * + , - . / |
| 50 | + (cp >= 0x3A && cp <= 0x40) || // : ; < = > ? @ |
| 51 | + (cp >= 0x5B && cp <= 0x60) || // [ \ ] ^ _ ` |
| 52 | + (cp >= 0x7B && cp <= 0x7E)) { // { | } ~ |
| 53 | + return true; |
| 54 | + } |
| 55 | + // General punctuation block (U+2000-U+206F) - includes dashes, quotes, etc. |
| 56 | + if (cp >= 0x2000 && cp <= 0x206F) { |
| 57 | + return true; |
| 58 | + } |
| 59 | + return false; |
| 60 | +} |
| 61 | + |
| 62 | +//! Check if text starts with an RTL (right-to-left) character |
| 63 | +//! Skips leading whitespace, newlines, and punctuation to find the first letter |
| 64 | +static bool prv_utf8_starts_with_rtl(const utf8_t *start, const utf8_t *end) { |
| 65 | + if (start == NULL || end == NULL || start >= end) { |
| 66 | + return false; |
| 67 | + } |
| 68 | + |
| 69 | + utf8_t *ptr = (utf8_t *)start; |
| 70 | + while (ptr < end && *ptr != '\0') { |
| 71 | + utf8_t *next = NULL; |
| 72 | + Codepoint cp = utf8_peek_codepoint(ptr, &next); |
| 73 | + if (cp == 0 || next == NULL) { |
| 74 | + break; |
| 75 | + } |
| 76 | + // Skip whitespace, newlines, and punctuation |
| 77 | + if (cp == SPACE_CODEPOINT || cp == NEWLINE_CODEPOINT || |
| 78 | + codepoint_is_zero_width(cp) || prv_codepoint_is_punctuation(cp)) { |
| 79 | + ptr = next; |
| 80 | + continue; |
| 81 | + } |
| 82 | + // Found first letter character, check if RTL |
| 83 | + return codepoint_is_rtl(cp); |
| 84 | + } |
| 85 | + return false; |
| 86 | +} |
| 87 | + |
46 | 88 | // PBL-23045 Eventually remove perimeter debugging |
47 | 89 | void graphics_text_perimeter_debugging_enable(bool enable) { |
48 | 90 | app_state_set_text_perimeter_debugging_enabled(enable); |
@@ -498,95 +540,206 @@ utf8_t* walk_line(GContext* ctx, Line* line, const TextBoxParams* const text_box |
498 | 540 | return NULL; |
499 | 541 | } |
500 | 542 |
|
501 | | - // RTL support: check if this line contains RTL characters and reverse if rendering |
502 | | - // Arabic text also needs shaping (connecting letters) BEFORE reversal |
503 | | - // Use smaller buffers to reduce stack usage (max 16 codepoints * 4 bytes = 64) |
504 | | - utf8_t shaped_buffer[64]; // Buffer for Arabic shaping |
505 | | - utf8_t rtl_buffer[64]; // Buffer for RTL reversal |
506 | | - const utf8_t *render_start = line->start; |
507 | | - bool is_rtl = false; |
| 543 | + // RTL support: segment-based rendering for mixed RTL/LTR text |
| 544 | + // Each RTL segment is reversed individually, LTR segments render normally |
508 | 545 | bool is_rendering = (char_visitor_cb == render_chars_char_visitor_cb); |
509 | 546 |
|
| 547 | + // For segment-based RTL rendering during render pass |
510 | 548 | if (is_rendering && line->start != NULL && text_box_params->utf8_bounds != NULL && |
511 | 549 | text_box_params->utf8_bounds->end != NULL && |
512 | | - text_box_params->utf8_bounds->end > line->start) { |
513 | | - // Find line end by calculating byte length from width |
514 | | - // We need to find where the line actually ends |
515 | | - utf8_t *line_end = (utf8_t *)text_box_params->utf8_bounds->end; |
| 550 | + text_box_params->utf8_bounds->end > line->start && |
| 551 | + utf8_contains_rtl(line->start, text_box_params->utf8_bounds->end)) { |
516 | 552 |
|
517 | | - // Check if this line contains RTL text |
518 | | - if (utf8_contains_rtl(line->start, line_end)) { |
519 | | - // Calculate the actual line length to reverse |
520 | | - // Use the line width to determine how much text fits |
521 | | - size_t src_len = 0; |
522 | | - utf8_t *ptr = (utf8_t *)line->start; |
523 | | - int width_so_far = 0; |
524 | | - |
525 | | - while (ptr < line_end && *ptr != '\0' && *ptr != '\n') { |
526 | | - utf8_t *next = NULL; |
527 | | - Codepoint cp = utf8_peek_codepoint(ptr, &next); |
528 | | - if (cp == 0 || next == NULL) { |
| 553 | + // Render using segment-based approach |
| 554 | + utf8_t *ptr = (utf8_t *)line->start; |
| 555 | + utf8_t *line_end = (utf8_t *)text_box_params->utf8_bounds->end; |
| 556 | + int walked_width_px = 0; |
| 557 | + utf8_t* last_visited_char = NULL; |
| 558 | + |
| 559 | + while (ptr < line_end && *ptr != '\0' && *ptr != '\n' && |
| 560 | + walked_width_px + suffix_width_px <= available_horiz_px) { |
| 561 | + |
| 562 | + // Find segment start and determine if RTL |
| 563 | + utf8_t *segment_start = ptr; |
| 564 | + utf8_t *next = NULL; |
| 565 | + Codepoint first_cp = utf8_peek_codepoint(ptr, &next); |
| 566 | + if (first_cp == 0 || next == NULL) break; |
| 567 | + |
| 568 | + // Skip leading punctuation/spaces to determine segment type |
| 569 | + bool segment_is_rtl = false; |
| 570 | + utf8_t *check_ptr = ptr; |
| 571 | + while (check_ptr < line_end && *check_ptr != '\0' && *check_ptr != '\n') { |
| 572 | + utf8_t *check_next = NULL; |
| 573 | + Codepoint check_cp = utf8_peek_codepoint(check_ptr, &check_next); |
| 574 | + if (check_cp == 0 || check_next == NULL) break; |
| 575 | + if (!prv_codepoint_is_punctuation(check_cp) && |
| 576 | + check_cp != SPACE_CODEPOINT && !codepoint_is_zero_width(check_cp)) { |
| 577 | + segment_is_rtl = codepoint_is_rtl(check_cp); |
529 | 578 | break; |
530 | 579 | } |
| 580 | + check_ptr = check_next; |
| 581 | + } |
| 582 | + |
| 583 | + // Collect segment (until we hit opposite script type or end) |
| 584 | + utf8_t *segment_end = ptr; |
| 585 | + utf8_t *segment_last_char = ptr; // Track start of last character in segment |
| 586 | + int segment_width_px = 0; // Track width within this segment |
| 587 | + while (segment_end < line_end && *segment_end != '\0' && *segment_end != '\n') { |
| 588 | + utf8_t *seg_next = NULL; |
| 589 | + Codepoint seg_cp = utf8_peek_codepoint(segment_end, &seg_next); |
| 590 | + if (seg_cp == 0 || seg_next == NULL) break; |
| 591 | + |
| 592 | + // Check if this character changes the segment type |
| 593 | + if (!prv_codepoint_is_punctuation(seg_cp) && |
| 594 | + seg_cp != SPACE_CODEPOINT && !codepoint_is_zero_width(seg_cp)) { |
| 595 | + bool char_is_rtl = codepoint_is_rtl(seg_cp); |
| 596 | + if (char_is_rtl != segment_is_rtl) { |
| 597 | + break; // End of segment |
| 598 | + } |
| 599 | + } |
| 600 | + |
| 601 | + // For RTL segments: don't include trailing spaces before LTR text |
| 602 | + // This prevents the space from being reversed to the wrong position |
| 603 | + if (segment_is_rtl && seg_cp == SPACE_CODEPOINT) { |
| 604 | + // Look ahead to see if next non-space char is LTR |
| 605 | + utf8_t *look_ptr = seg_next; |
| 606 | + while (look_ptr < line_end && *look_ptr != '\0' && *look_ptr != '\n') { |
| 607 | + utf8_t *look_next = NULL; |
| 608 | + Codepoint look_cp = utf8_peek_codepoint(look_ptr, &look_next); |
| 609 | + if (look_cp == 0 || look_next == NULL) break; |
| 610 | + if (look_cp != SPACE_CODEPOINT && !codepoint_is_zero_width(look_cp) && |
| 611 | + !prv_codepoint_is_punctuation(look_cp)) { |
| 612 | + // Found a letter - if it's LTR, end segment before the space |
| 613 | + if (!codepoint_is_rtl(look_cp)) { |
| 614 | + goto end_segment; // Break out of collection loop |
| 615 | + } |
| 616 | + break; |
| 617 | + } |
| 618 | + look_ptr = look_next; |
| 619 | + } |
| 620 | + } |
| 621 | + |
| 622 | + // Check width constraint (must account for walked width + accumulated segment width) |
531 | 623 | int glyph_width = prv_codepoint_get_horizontal_advance(&ctx->font_cache, |
532 | | - text_box_params->font, cp); |
533 | | - if (width_so_far + glyph_width > available_horiz_px) { |
| 624 | + text_box_params->font, seg_cp); |
| 625 | + if (walked_width_px + segment_width_px + glyph_width + suffix_width_px > available_horiz_px) { |
534 | 626 | break; |
535 | 627 | } |
536 | | - width_so_far += glyph_width; |
537 | | - ptr = next; |
538 | | - } |
539 | | - src_len = ptr - line->start; |
540 | 628 |
|
541 | | - // Limit src_len to fit in our buffer (with room for UTF-8 expansion) |
542 | | - if (src_len > sizeof(rtl_buffer) - 4) { |
543 | | - src_len = sizeof(rtl_buffer) - 4; |
| 629 | + segment_width_px += glyph_width; |
| 630 | + segment_last_char = segment_end; // Track this as the last character before advancing |
| 631 | + segment_end = seg_next; |
544 | 632 | } |
| 633 | + end_segment: |
| 634 | + |
| 635 | + size_t segment_len = segment_end - segment_start; |
| 636 | + if (segment_len == 0) break; |
| 637 | + |
| 638 | + // Render the segment |
| 639 | + if (segment_is_rtl) { |
| 640 | + // RTL segment: shape Arabic if needed, then reverse and render |
| 641 | + utf8_t shaped_buffer[64]; |
| 642 | + utf8_t rtl_buffer[64]; |
| 643 | + const utf8_t *to_render = segment_start; |
| 644 | + size_t render_len = segment_len; |
| 645 | + |
| 646 | + // Limit to buffer size |
| 647 | + if (render_len > sizeof(rtl_buffer) - 4) { |
| 648 | + render_len = sizeof(rtl_buffer) - 4; |
| 649 | + } |
545 | 650 |
|
546 | | - if (src_len > 0) { |
547 | | - // Step 1: Shape Arabic text first (connect letters based on context) |
548 | | - // This must happen BEFORE RTL reversal to analyze letter positions correctly |
549 | | - const utf8_t *to_reverse = line->start; |
550 | | - size_t to_reverse_len = src_len; |
551 | | - |
552 | | - if (utf8_contains_arabic(line->start, ptr)) { |
553 | | - size_t shaped_len = arabic_shape_text(line->start, src_len, |
| 651 | + // Shape Arabic text first |
| 652 | + if (utf8_contains_arabic(segment_start, segment_end)) { |
| 653 | + size_t shaped_len = arabic_shape_text(segment_start, render_len, |
554 | 654 | shaped_buffer, sizeof(shaped_buffer) - 1); |
555 | 655 | if (shaped_len > 0) { |
556 | 656 | shaped_buffer[shaped_len] = '\0'; |
557 | | - to_reverse = shaped_buffer; |
558 | | - to_reverse_len = shaped_len; |
| 657 | + to_render = shaped_buffer; |
| 658 | + render_len = shaped_len; |
559 | 659 | } |
560 | 660 | } |
561 | 661 |
|
562 | | - // Step 2: Then reverse for RTL display |
563 | | - size_t reversed_len = utf8_reverse_for_rtl(to_reverse, to_reverse_len, |
| 662 | + // Reverse for RTL display |
| 663 | + size_t reversed_len = utf8_reverse_for_rtl(to_render, render_len, |
564 | 664 | rtl_buffer, sizeof(rtl_buffer) - 1); |
565 | 665 | if (reversed_len > 0) { |
566 | 666 | rtl_buffer[reversed_len] = '\0'; |
567 | | - render_start = rtl_buffer; |
568 | | - is_rtl = true; |
| 667 | + |
| 668 | + // Render reversed segment |
| 669 | + utf8_t *rptr = rtl_buffer; |
| 670 | + while (*rptr != '\0') { |
| 671 | + utf8_t *rnext = NULL; |
| 672 | + Codepoint rcp = utf8_peek_codepoint(rptr, &rnext); |
| 673 | + if (rcp == 0 || rnext == NULL) break; |
| 674 | + |
| 675 | + int glyph_width = prv_codepoint_get_horizontal_advance(&ctx->font_cache, |
| 676 | + text_box_params->font, rcp); |
| 677 | + |
| 678 | + GRect cursor = { |
| 679 | + .origin = line->origin, |
| 680 | + .size.w = glyph_width, |
| 681 | + .size.h = fonts_get_font_height(text_box_params->font) |
| 682 | + }; |
| 683 | + cursor.origin.x += walked_width_px; |
| 684 | + |
| 685 | + if (!codepoint_is_zero_width(rcp)) { |
| 686 | + render_glyph(ctx, rcp, text_box_params->font, cursor); |
| 687 | + } |
| 688 | + |
| 689 | + walked_width_px += glyph_width; |
| 690 | + rptr = rnext; |
| 691 | + } |
| 692 | + } |
| 693 | + } else { |
| 694 | + // LTR segment: render normally |
| 695 | + utf8_t *sptr = segment_start; |
| 696 | + while (sptr < segment_end) { |
| 697 | + utf8_t *snext = NULL; |
| 698 | + Codepoint scp = utf8_peek_codepoint(sptr, &snext); |
| 699 | + if (scp == 0 || snext == NULL) break; |
| 700 | + |
| 701 | + int glyph_width = prv_codepoint_get_horizontal_advance(&ctx->font_cache, |
| 702 | + text_box_params->font, scp); |
| 703 | + |
| 704 | + GRect cursor = { |
| 705 | + .origin = line->origin, |
| 706 | + .size.w = glyph_width, |
| 707 | + .size.h = fonts_get_font_height(text_box_params->font) |
| 708 | + }; |
| 709 | + cursor.origin.x += walked_width_px; |
| 710 | + |
| 711 | + if (!codepoint_is_zero_width(scp)) { |
| 712 | + render_glyph(ctx, scp, text_box_params->font, cursor); |
| 713 | + } |
| 714 | + |
| 715 | + walked_width_px += glyph_width; |
| 716 | + last_visited_char = sptr; |
| 717 | + sptr = snext; |
569 | 718 | } |
570 | 719 | } |
| 720 | + |
| 721 | + ptr = segment_end; |
| 722 | + last_visited_char = segment_last_char; |
571 | 723 | } |
| 724 | + |
| 725 | + // Handle suffix if present |
| 726 | + if (line->suffix_codepoint) { |
| 727 | + GRect cursor = { |
| 728 | + .origin = line->origin, |
| 729 | + .size.w = suffix_width_px, |
| 730 | + .size.h = fonts_get_font_height(text_box_params->font) |
| 731 | + }; |
| 732 | + cursor.origin.x += walked_width_px; |
| 733 | + render_glyph(ctx, line->suffix_codepoint, text_box_params->font, cursor); |
| 734 | + } |
| 735 | + |
| 736 | + return last_visited_char; |
572 | 737 | } |
573 | 738 |
|
574 | | - // Set up iterator - use reversed buffer for RTL rendering |
| 739 | + // Standard rendering path (no RTL or not rendering) |
575 | 740 | Iterator char_iter; |
576 | 741 | CharIterState char_iter_state; |
577 | | - TextBoxParams rtl_text_box_params; |
578 | | - Utf8Bounds rtl_bounds; |
579 | | - |
580 | | - if (is_rtl) { |
581 | | - // Create temporary bounds and params for the reversed text |
582 | | - rtl_bounds.start = (utf8_t *)render_start; |
583 | | - rtl_bounds.end = (utf8_t *)render_start + strlen((const char *)render_start); |
584 | | - rtl_text_box_params = *text_box_params; |
585 | | - rtl_text_box_params.utf8_bounds = &rtl_bounds; |
586 | | - char_iter_init(&char_iter, &char_iter_state, &rtl_text_box_params, (utf8_t *)render_start); |
587 | | - } else { |
588 | | - char_iter_init(&char_iter, &char_iter_state, text_box_params, line->start); |
589 | | - } |
| 742 | + char_iter_init(&char_iter, &char_iter_state, text_box_params, line->start); |
590 | 743 | Utf8IterState* utf8_iter_state = (Utf8IterState*) &char_iter_state.utf8_iter_state; |
591 | 744 |
|
592 | 745 | bool is_newline_as_space = text_box_params->overflow_mode == GTextOverflowModeFill; |
@@ -1008,9 +1161,9 @@ static void prv_line_justify(Line* line, const TextBoxParams* const text_box_par |
1008 | 1161 | // Determine effective alignment - RTL text defaults to right alignment |
1009 | 1162 | GTextAlignment effective_alignment = text_box_params->alignment; |
1010 | 1163 |
|
1011 | | - // If alignment is left (default) and text contains RTL, switch to right |
| 1164 | + // If alignment is left (default) and text starts with RTL, switch to right |
1012 | 1165 | if (effective_alignment == GTextAlignmentLeft && line->start != NULL) { |
1013 | | - if (utf8_contains_rtl(line->start, text_box_params->utf8_bounds->end)) { |
| 1166 | + if (prv_utf8_starts_with_rtl(line->start, text_box_params->utf8_bounds->end)) { |
1014 | 1167 | effective_alignment = GTextAlignmentRight; |
1015 | 1168 | } |
1016 | 1169 | } |
|
0 commit comments