diff --git a/TeXmacs/tests/tmu/209_9.tmu b/TeXmacs/tests/tmu/209_9.tmu new file mode 100644 index 0000000000..f36ea0b188 --- /dev/null +++ b/TeXmacs/tests/tmu/209_9.tmu @@ -0,0 +1,36 @@ +> + +> + +<\body> + code模式示例: + + <\cpp-code> + > + + + python-code模式示例: + + <\python-code> + + + + cpp-code模式示例: + + <\cpp-code> + + + + r-code模式示例: + + <\r-code> + + + + +<\initial> + <\collection> + + + + diff --git a/devel/209_9.md b/devel/209_9.md new file mode 100644 index 0000000000..ed50b24521 --- /dev/null +++ b/devel/209_9.md @@ -0,0 +1,47 @@ +# 209_9 修复代码模式的中文换行显示问题 + +## 如何测试 +1. 启动 Mogan / TeXmacs +2. 插入以下任意代码环境之一(或其他支持的代码环境): + - `\code` + - `\python-code` + - `\cpp-code` + - `\r-code` + +3. 输入一行**足够长**、包含**中文字符**的内容,例如: +```tex +z中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中中 +``` +4. 在开头输入字符以触发不同位置的自动换行 + +期望结果: + +- 中文字符不会被拆分 + +- 不再出现 <#XXXX> 或在 < 处断裂的异常显示 + +- 中文字符要么完整出现在上一行,要么完整出现在下一行 + +测试文档: TeXmacs/tests/tmu/209_9.tmu + +## 2026/1/21 +### What +修复在代码模式下(包括 \code、\python-code、\cpp-code 等环境) +中文字符在自动换行时被错误拆分、显示为 <#XXXX> 的问题。 + +### Why +代码模式在自动换行时直接按字符串下标切分字符串, +当断行位置落在 <#XXXX> 内部时,会破坏内部转义结构, +最终导致渲染失败并显示为 <#XXXX>。 + +关联issue #2605 + +### How +在 verb_language_rep::hyphenate 与 prog_language_rep::hyphenate 中 +引入断行边界保护机制: + +- 将 <#...> 内部转义序列视为不可拆分的原子 + +- 若断行位置落在原子内部,则向左吸附到最近的合法边界 + +- 仅在合法边界处对字符串进行切分 diff --git a/src/System/Language/code_wrap.hpp b/src/System/Language/code_wrap.hpp new file mode 100644 index 0000000000..325ca2768c --- /dev/null +++ b/src/System/Language/code_wrap.hpp @@ -0,0 +1,46 @@ +#ifndef TM_CODE_WRAP_HPP +#define TM_CODE_WRAP_HPP + +#include "basic.hpp" +#include "string.hpp" +// Protect TeXmacs internal escape sequences like "<#4E2D>" (CJK, etc.) +// from being split during automatic line wrapping in code/prog environments. +// +// NOTE: +// - We only protect "<#...>" to avoid affecting normal code like "". +// - This is a last-resort safety net: even if the line breaker proposes an +// invalid split position, we snap it to a valid boundary here. +static inline int +tm_atom_end_for_code_wrap (string s, int i) { + int n= N (s); + if (i < 0 || i >= n) return i; + if (s[i] != '<') return i + 1; + + if (i + 1 >= n || s[i + 1] != '#') return i + 1; + + int j= i + 2; + while (j < n && s[j] != '>') + j++; + if (j < n && s[j] == '>') return j + 1; + + return i + 1; +} + +static inline int +tm_snap_after_boundary_for_code_wrap (string s, int after) { + int n= N (s); + if (after <= 0) return 0; + if (after >= n) return n; + + int i = 0; + int last= 0; + while (i < n) { + int j= tm_atom_end_for_code_wrap (s, i); + if (j > after) break; + last= j; + i = j; + } + return last; +} + +#endif // TM_CODE_WRAP_HPP diff --git a/src/System/Language/prog_language.cpp b/src/System/Language/prog_language.cpp index a971b257dd..cc04be6615 100644 --- a/src/System/Language/prog_language.cpp +++ b/src/System/Language/prog_language.cpp @@ -11,6 +11,7 @@ ******************************************************************************/ #include "analyze.hpp" +#include "code_wrap.hpp" #include "convert.hpp" #include "converter.hpp" #include "cork.hpp" @@ -293,8 +294,9 @@ prog_language_rep::get_hyphens (string s) { void prog_language_rep::hyphenate (string s, int after, string& left, string& right) { - left = s (0, after); - right= s (after, N (s)); + int a= tm_snap_after_boundary_for_code_wrap (s, after); + left = s (0, a); + right= s (a, N (s)); } string diff --git a/src/System/Language/verb_language.cpp b/src/System/Language/verb_language.cpp index 2ed1714e69..a31828a5c8 100644 --- a/src/System/Language/verb_language.cpp +++ b/src/System/Language/verb_language.cpp @@ -10,6 +10,7 @@ ******************************************************************************/ #include "analyze.hpp" +#include "code_wrap.hpp" #include "impl_language.hpp" #include "observers.hpp" #include "packrat.hpp" @@ -64,8 +65,9 @@ verb_language_rep::get_hyphens (string s) { void verb_language_rep::hyphenate (string s, int after, string& left, string& right) { - left = s (0, after); - right= s (after, N (s)); + int a= tm_snap_after_boundary_for_code_wrap (s, after); + left = s (0, a); + right= s (a, N (s)); } string