Skip to content

Word Wrapping in Mixed-Language Markdown Rendering And Quick Fix (CalcWordWrapPosition) #9066

@xuboying

Description

@xuboying

Version/Branch of Dear ImGui:

version 1f7f1f5, tag: v1.92.2b-docking

Back-ends:

imgui_impl_glfw imgui_impl_opengl3

Compiler, OS:

Windows 11 + MSVC 2022

Full config/build information:

set(APP_NAME basic_imgui)

file(GLOB IMGUI_SRC
    ${THIRDPP_DIR}/imgui/*.cpp
    ${THIRDPP_DIR}/imgui/backends/imgui_impl_glfw.cpp
    ${THIRDPP_DIR}/imgui/backends/imgui_impl_opengl3.cpp

)

file(
    GLOB "${APP_NAME}_SOURCES"
    ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
)

list(APPEND ${APP_NAME}_SOURCES
    ${APP_DIR}/dpi.manifest # can be omitted
)

add_executable(
    ${APP_NAME}
    ${${APP_NAME}_SOURCES}
    ${IMGUI_SRC}
)

target_compile_definitions(
    ${APP_NAME} PRIVATE
    IMGUI_ENABLE_FREETYPE_PLUTOSVG
    IMGUI_USE_WCHAR32
)

target_include_directories(
    ${APP_NAME} PRIVATE
    ${THIRDPP_DIR}/imgui
    ${THIRDPP_DIR}/imgui/backends
)
target_link_libraries(${APP_NAME} PRIVATE
    plutosvg
    glfw
    glad
)

Details:

I'm building a Markdown text renderer and using the RenderTextWrapped function from imgui_markdown. However, I'm encountering unsatisfactory wrapping behavior when rendering paragraphs that mix Chinese and English, or Emoji and English.

Problem Description

In mixed-language paragraphs, the renderer wraps too early—typically at spaces between English words—even when the line could visually accommodate more characters. This results in suboptimal layout, especially for Chinese or Emoji-rich content.

Example (before fix):

Before Fix

After debugging, I traced the issue to ImFont::CalcWordWrapPosition, which is internally used by RenderTextWrapped. The function uses an inside_word heuristic to determine wrap points, but it treats all characters—including Asian characters and emoji—as part of a word. This causes premature wrapping when these characters are adjacent to English text.

Proposed Fix

To address this, I made a minimal patch with the following goals:

  1. Preserve existing behavior for all current use cases.
  2. Introduce a new concept: char_is_asian_or_emoj. These characters are treated similarly to space characters for wrapping logic. Specifically, if the previous character is Asian or Emoji, prev_word_end is updated to allow wrapping after it.
  3. Improve punctuation handling: The original logic doesn't handle punctuation before words well. I applied similar logic to ensure punctuation doesn't interfere with wrap decisions.

Example (after fix):

After Fix

Limitations

Chinese punctuation is not yet handled and may still cause awkward breaks.

Code is minimally tested and may contain redundant logic or edge cases not yet covered.


 git diff -- .\imgui_draw.cpp
diff --git a/imgui_draw.cpp b/imgui_draw.cpp
index 63f14a47..f0c1e8a3 100644
--- a/imgui_draw.cpp
+++ b/imgui_draw.cpp
@@ -5319,7 +5319,7 @@ ImFontBaked* ImFontAtlasBakedGetOrAdd(ImFontAtlas* atlas, ImFont* font, float fo
     ImFontBaked* baked = *p_baked_in_map;
     if (baked != NULL)
     {
-        IM_ASSERT(baked->Size == font_size && baked->ContainerFont == font && baked->BakedId == baked_id);
+        // IM_ASSERT(baked->Size == font_size && baked->ContainerFont == font && baked->BakedId == baked_id);
         return baked;
     }
 
@@ -5352,10 +5352,11 @@ static inline const char* CalcWordWrapNextLineStartA(const char* text, const cha
         text++;
     return text;
 }
-
+#define PATCHWRAP  // for testing purpose only
 // Simple word-wrapping for English, not full-featured. Please submit failing cases!
 // This will return the next location to wrap from. If no wrapping if necessary, this will fast-forward to e.g. text_end.
 // FIXME: Much possible improvements (don't cut things like "word !", "word!!!" but cut within "word,,,,", more sensible support for punctuations, support for Unicode punctuations, etc.)
+#ifdef PATCHWRAP  // for testing purpose only
 const char* ImFont::CalcWordWrapPosition(float size, const char* text, const char* text_end, float wrap_width)
 {
     // For references, possible wrap point marked with ^
@@ -5381,18 +5382,28 @@ const char* ImFont::CalcWordWrapPosition(float size, const char* text, const cha
     const char* word_end = text;
     const char* prev_word_end = NULL;
     bool inside_word = true;
-
     const char* s = text;
     IM_ASSERT(text_end != NULL);
+    bool prev_char_is_asian_or_emoj = false;
+    bool prev_char_is_not_punctuation = false;
     while (s < text_end)
     {
+        bool current_char_is_asian_or_emoj = false;
         unsigned int c = (unsigned int)*s;
         const char* next_s;
         if (c < 0x80)
+        {
             next_s = s + 1;
+            current_char_is_asian_or_emoj = false;
+        }
         else
+        {
             next_s = s + ImTextCharFromUtf8(&c, s, text_end);
-
+            if (c > 0x4e00) {
+                current_char_is_asian_or_emoj = true;
+                inside_word = false;
+            }
+        }
         if (c < 32)
         {
             if (c == '\n')
@@ -5408,7 +5419,7 @@ const char* ImFont::CalcWordWrapPosition(float size, const char* text, const cha
                 continue;
             }
         }
-
+        bool const current_char_is_not_punctuation = (c != '.' && c != ',' && c != ';' && c != '!' && c != '?' && c != '\"' && c != 0x3001 && c != 0x3002);     
         // Optimized inline version of 'float char_width = GetCharAdvance((ImWchar)c);'
         float char_width = (c < (unsigned int)baked->IndexAdvanceX.Size) ? baked->IndexAdvanceX.Data[c] : -1.0f;
         if (char_width < 0.0f)
@@ -5416,7 +5427,7 @@ const char* ImFont::CalcWordWrapPosition(float size, const char* text, const cha

         if (ImCharIsBlankW(c))
         {
-            if (inside_word)
+            if (inside_word || prev_char_is_asian_or_emoj)
             {
                 line_width += blank_width;
                 blank_width = 0.0f;
@@ -5432,15 +5443,23 @@ const char* ImFont::CalcWordWrapPosition(float size, const char* text, const cha
             {
                 word_end = next_s;
             }
-            else
-            {
-                prev_word_end = word_end;
+            else {
+                if (prev_char_is_asian_or_emoj || ! prev_char_is_not_punctuation)
+                {
+                    prev_word_end = word_end = s;
+                    // word_end=s;
+                }
+                else
+                {
+                    prev_word_end = word_end;
+                }
                 line_width += word_width + blank_width;
                 word_width = blank_width = 0.0f;
             }

             // Allow wrapping after punctuation.
-            inside_word = (c != '.' && c != ',' && c != ';' && c != '!' && c != '?' && c != '\"' && c != 0x3001 && c != 0x3002);
+            inside_word = current_char_is_not_punctuation;
+
         }

         // We ignore blank width at the end of the line (they can be skipped)
@@ -5453,8 +5472,9 @@ const char* ImFont::CalcWordWrapPosition(float size, const char* text, const cha
         }

         s = next_s;
+        prev_char_is_asian_or_emoj = current_char_is_asian_or_emoj;
+        prev_char_is_not_punctuation = current_char_is_not_punctuation;
     }
-
     // Wrap_width is too small to fit anything. Force displaying 1 character to minimize the height discontinuity.
     // +1 may not be a character start point in UTF-8 but it's ok because caller loops use (text >= word_wrap_eol).
     if (s == text && text < text_end)
@@ -5462,6 +5482,11 @@ const char* ImFont::CalcWordWrapPosition(float size, const char* text, const cha
     return s;
 }

+
+
+
+#endif
+
 ImVec2 ImFont::CalcTextSizeA(float size, float max_width, float wrap_width, const char* text_begin, const char* text_end, const char** remaining)
 {
     if (!text_end)

for review

#define PATCHWRAP  // for testing purpose only
// Simple word-wrapping for English, not full-featured. Please submit failing cases!
// This will return the next location to wrap from. If no wrapping if necessary, this will fast-forward to e.g. text_end.
// FIXME: Much possible improvements (don't cut things like "word !", "word!!!" but cut within "word,,,,", more sensible support for punctuations, support for Unicode punctuations, etc.)
#ifdef PATCHWRAP  // for testing purpose only
const char* ImFont::CalcWordWrapPosition(float size, const char* text, const char* text_end, float wrap_width)
{
    // For references, possible wrap point marked with ^
    //  "aaa bbb, ccc,ddd. eee   fff. ggg!"
    //      ^    ^    ^   ^   ^__    ^    ^

    // List of hardcoded separators: .,;!?'"

    // Skip extra blanks after a line returns (that includes not counting them in width computation)
    // e.g. "Hello    world" --> "Hello" "World"

    // Cut words that cannot possibly fit within one line.
    // e.g.: "The tropical fish" with ~5 characters worth of width --> "The tr" "opical" "fish"

    ImFontBaked* baked = GetFontBaked(size);
    const float scale = size / baked->Size;

    float line_width = 0.0f;
    float word_width = 0.0f;
    float blank_width = 0.0f;
    wrap_width /= scale; // We work with unscaled widths to avoid scaling every characters

    const char* word_end = text;
    const char* prev_word_end = NULL;
    bool inside_word = true;
    const char* s = text;
    IM_ASSERT(text_end != NULL);
    bool prev_char_is_asian_or_emoj = false;
    bool prev_char_is_not_punctuation = false;
    while (s < text_end)
    {
        bool current_char_is_asian_or_emoj = false;
        unsigned int c = (unsigned int)*s;
        const char* next_s;
        if (c < 0x80)
        {
            next_s = s + 1;
            current_char_is_asian_or_emoj = false;
        }
        else
        {
            next_s = s + ImTextCharFromUtf8(&c, s, text_end);
            if (c > 0x4e00) {
                current_char_is_asian_or_emoj = true;
                inside_word = false;
            }
        }
        if (c < 32)
        {
            if (c == '\n')
            {
                line_width = word_width = blank_width = 0.0f;
                inside_word = true;
                s = next_s;
                continue;
            }
            if (c == '\r')
            {
                s = next_s;
                continue;
            }
        }
        bool const current_char_is_not_punctuation = (c != '.' && c != ',' && c != ';' && c != '!' && c != '?' && c != '\"' && c != 0x3001 && c != 0x3002);
        // Optimized inline version of 'float char_width = GetCharAdvance((ImWchar)c);'
        float char_width = (c < (unsigned int)baked->IndexAdvanceX.Size) ? baked->IndexAdvanceX.Data[c] : -1.0f;
        if (char_width < 0.0f)
            char_width = BuildLoadGlyphGetAdvanceOrFallback(baked, c);

        if (ImCharIsBlankW(c))
        {
            if (inside_word || prev_char_is_asian_or_emoj)
            {
                line_width += blank_width;
                blank_width = 0.0f;
                word_end = s;
            }
            blank_width += char_width;
            inside_word = false;
        }
        else
        {
            word_width += char_width;
            if (inside_word)
            {
                word_end = next_s;
            }
            else {
                if (prev_char_is_asian_or_emoj || ! prev_char_is_not_punctuation)
                {
                    prev_word_end = word_end = s;
                    // word_end=s;
                }
                else
                {
                    prev_word_end = word_end;
                }
                line_width += word_width + blank_width;
                word_width = blank_width = 0.0f;
            }

            // Allow wrapping after punctuation.
            inside_word = current_char_is_not_punctuation;

        }

        // We ignore blank width at the end of the line (they can be skipped)
        if (line_width + word_width > wrap_width)
        {
            // Words that cannot possibly fit within an entire line will be cut anywhere.
            if (word_width < wrap_width)
                s = prev_word_end ? prev_word_end : word_end;
            break;
        }

        s = next_s;
        prev_char_is_asian_or_emoj = current_char_is_asian_or_emoj;
        prev_char_is_not_punctuation = current_char_is_not_punctuation;
    }
    // Wrap_width is too small to fit anything. Force displaying 1 character to minimize the height discontinuity.
    // +1 may not be a character start point in UTF-8 but it's ok because caller loops use (text >= word_wrap_eol).
    if (s == text && text < text_end)
        return s + ImTextCountUtf8BytesFromChar(s, text_end);
    return s;
}




#endif

Screenshots/Video:

No response

Minimal, Complete and Verifiable Example code:

main.cpp

//
#include <string>
//
#include <GLFW/glfw3.h>
#include <imgui.h>
#include <imgui_impl_glfw.h>
#include <imgui_impl_opengl3.h>
#include <misc/freetype/imgui_freetype.h>

namespace
{

auto loadFont() -> ImFont *
{
    ImGuiIO &io = ImGui::GetIO();
    ImFont *font = nullptr;
    ImFontConfig fontCfg;
    font = io.Fonts->AddFontFromFileTTF(R"(C:\Windows\Fonts\arial.ttf)", 0.0F, &fontCfg);
    ImFontConfig fontCfg1;
    fontCfg1.MergeMode = true;
    fontCfg1.FontLoaderFlags |= ImGuiFreeTypeLoaderFlags_LoadColor;
    font = io.Fonts->AddFontFromFileTTF(R"(C:\Windows\Fonts\SimHei.ttf)", 0.0F, &fontCfg1);
    ImFontConfig fontCfgEmoj;
    font = io.Fonts->AddFontFromFileTTF(R"(C:\Windows\Fonts\seguiemj.ttf)", 0.0F, &fontCfg1);
    return font;
}

// based on https://github.com/enkisoftware/imgui_markdown/blob/main/imgui_markdown.h
void RenderTextWrapped(const char *text_, const char *text_end_)
{
#if IMGUI_VERSION_NUM >= 19197
    float fontSize = ImGui::GetFontSize();
#else
    float scale = ImGui::GetIO().FontGlobalScale;
#endif
    float widthLeft = ImGui::GetContentRegionAvail().x;
#if IMGUI_VERSION_NUM >= 19197
    const char *endLine = ImGui::GetFont()->CalcWordWrapPosition(fontSize, text_, text_end_, widthLeft);
#else
    const char *endLine = ImGui::GetFont()->CalcWordWrapPositionA(scale, text_, text_end_, widthLeft);
#endif
    ImGui::TextUnformatted(text_, endLine);
    widthLeft = ImGui::GetContentRegionAvail().x;
    while (endLine < text_end_)
    {
        text_ = endLine;
        if (*text_ == ' ')
        {
            ++text_;
        } // skip a space at start of line
#if IMGUI_VERSION_NUM >= 19197
        endLine = ImGui::GetFont()->CalcWordWrapPosition(fontSize, text_, text_end_, widthLeft);
#else
        endLine = ImGui::GetFont()->CalcWordWrapPositionA(scale, text_, text_end_, widthLeft);
#endif
        if (text_ == endLine)
        {
            endLine++;
        }
        ImGui::TextUnformatted(text_, endLine);
    }
}

std::string textdoc1 = R"(

..................................................word word

// ZH-CN and EN, no space in between

字文字文字字文字文字文字文字字文字文字字文字文字文字文字word word文字文字文字文字文字文字文字文字

// ZH-CN and EN, space in between

字文字文字字文字文字文字文字 word word 字文字文字文字文字文字文。

// Emoj and EN no space in between

😁😁😁😁😁😁😁word word😁😁😁😁😁😁😁😁😁

// Emoj and EN space in between

😁😁😁😁😁😁😁😁😁😁😁😁😁😁😁😁😁😁😁 word word 😁😁😁😁😁😁😁😁😁😁😁😁😁

---

### Multilingual Test Phrases (Word-Based Languages)

All the following languages are **word-based**. When wrapping text, the line break **must occur at the spaces between words**, and **must not split a word across two lines**.

**English:**  

The quick brown fox jumps over the lazy dog.

**French:**  

Le renard brun rapide saute par-dessus le chien paresseux.

**German:**  

Die schnell braune Fuchs springt über den faulen Hund.

**Spanish:**  

El zorro marrón rápido salta sobre el perro perezoso.

**Russian:**  

Быстрая коричневая лиса прыгает через ленивую собаку.

**Greek:**  

Η γρήγορη καφέ αλεπού πηδάει πάνω από το τεμπέλικο σκυλί.

**Dutch:**  

De snelle bruine vos springt over de luie hond.

)";

} // namespace
auto main() -> int
{
    // Init GLFW
    glfwInit();
    const char *glsl_version = "#version 130";
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
    GLFWwindow *window = glfwCreateWindow(800, 600, "Minimal ImGui", nullptr, nullptr);
    glfwMakeContextCurrent(window);
    glfwSwapInterval(1); // Enable vsync

    // Init ImGui
    IMGUI_CHECKVERSION();
    ImGui::CreateContext();
    ImGuiIO &io = ImGui::GetIO();
    (void) io;
    io.IniFilename = nullptr;
    // ImGui::StyleColorsDark(); // or Light()

    ImGui_ImplGlfw_InitForOpenGL(window, true);
    ImGui_ImplOpenGL3_Init(glsl_version);
    auto *font = loadFont();

    // Main loop
    while (glfwWindowShouldClose(window) == 0)
    {
        glfwPollEvents();
        ImGui_ImplOpenGL3_NewFrame();
        ImGui_ImplGlfw_NewFrame();
        ImGui::NewFrame();

        int display_w = 0;
        int display_h = 0;
        glfwGetFramebufferSize(window, &display_w, &display_h);
        ImGui::SetNextWindowPos(ImVec2(0, 0), ImGuiCond_Always);
        ImGui::SetNextWindowSize(ImVec2((float) display_w, (float) display_h), ImGuiCond_Once);

        ImGui::Begin("Full Window", nullptr);
        ImGui::PushFont(font, 20.0F);
        RenderTextWrapped(textdoc1.c_str(), textdoc1.c_str() + textdoc1.size());
        ImGui::PopFont();
        ImGui::End();
        // Render
        ImGui::Render();

        glViewport(0, 0, display_w, display_h);
        glClearColor(0.1f, 0.1f, 0.1f, 1.0f);
        glClear(GL_COLOR_BUFFER_BIT);
        ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
        glfwSwapBuffers(window);
    }

    // Cleanup
    ImGui_ImplOpenGL3_Shutdown();
    ImGui_ImplGlfw_Shutdown();
    ImGui::DestroyContext();
    glfwDestroyWindow(window);
    glfwTerminate();
    return 0;
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions