Skip to content
This repository was archived by the owner on Apr 29, 2021. It is now read-only.

Commit 5bb0b59

Browse files
author
Yuncong Zhang
committed
Merge branch 'yczhang' into gallery
2 parents 4f58791 + e64cbb5 commit 5bb0b59

File tree

1 file changed

+26
-1
lines changed

1 file changed

+26
-1
lines changed

Runtime/ui/txt/wordbreaker.cs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,23 @@ int _findNextBoundaryNormal() {
9393
if (this._current == this._size) {
9494
return -1;
9595
}
96-
96+
9797
WordSeparate.characterType preType = WordSeparate.classifyChar(this._text, this._current + this._offset);
98+
bool preBoundaryChar = isBoundaryChar(this._text[this._current + this._offset]);
9899
this._current++;
100+
if (preBoundaryChar) {
101+
return this._current;
102+
}
103+
99104
for (; this._current < this._size; ++this._current) {
100105
this.nextUntilCodePoint();
101106
if (this._current >= this._size) {
102107
break;
103108
}
109+
110+
if (isBoundaryChar(this._text[this._current + this._offset])) {
111+
break;
112+
}
104113
var currentType = WordSeparate.classifyChar(this._text, this._current + this._offset);
105114
if ((currentType == WordSeparate.characterType.WhiteSpace)
106115
!= (preType == WordSeparate.characterType.WhiteSpace)) {
@@ -152,6 +161,22 @@ public static uint getSupplementary(uint lead, uint trail) {
152161
return (char) (((uint) (lead) << 10) + (uint) (trail - U16_SURROGATE_OFFSET));
153162
}
154163

164+
public static bool isBoundaryChar(char code) {
165+
if (char.IsPunctuation(code)) {
166+
return true;
167+
}
168+
if (code >= 0x4E00 && code <= 0x9FFF) { // cjk https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
169+
return true;
170+
}
171+
172+
// https://social.msdn.microsoft.com/Forums/en-US/0d1888de-9745-4dd1-80fd-d3c29d3e381d/checking-for-japanese-characters-in-a-string?forum=vcmfcatl
173+
if (code >= 0x3040 && code <= 0x30FF) { // Hiragana or Katakana
174+
return true;
175+
}
176+
177+
return false;
178+
}
179+
155180
void nextUntilCodePoint() {
156181
while (this._current < this._size
157182
&& (char.IsLowSurrogate(this._text[this._current + this._offset])

0 commit comments

Comments
 (0)