Skip to content

Commit 4ac7de5

Browse files
committed
Update display_cell_tokenize
1 parent 156697b commit 4ac7de5

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

pythainlp/tokenize/core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,7 @@ def display_cell_tokenize(text: str) -> List[str]:
757757

758758
display_cells = []
759759
current_cell = ""
760+
text = text.replace("ำ", "ํา")
760761

761762
for char in text:
762763
if re.match(r"[\u0E31\u0E34-\u0E3A\u0E47-\u0E4E]", char):

tests/core/test_tokenize.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,10 @@ def test_tcc_p(self):
608608

609609
def test_display_cell_tokenize(self):
610610
self.assertEqual(display_cell_tokenize(""), [])
611-
self.assertEqual(display_cell_tokenize("แม่น้ำอยู่ที่ไหน"), ["แ", "ม่", "น้ํ", "า", "อ", "ยู่", "ที่", "ไ", "ห", "น"])
612-
self.assertEqual(display_cell_tokenize("สวัสดี"), ["ส", "ว", "ั", "ส", "ด", "ี"])
611+
self.assertEqual(
612+
display_cell_tokenize("แม่น้ำอยู่ที่ไหน"),
613+
["แ", "ม่", "น้ํ", "า", "อ", "ยู่", "ที่", "ไ", "ห", "น"]
614+
)
615+
self.assertEqual(display_cell_tokenize("สวัสดี"), ['ส', 'วั', 'ส', 'ดี'])
613616
self.assertEqual(display_cell_tokenize("ทดสอบ"), ["ท", "ด", "ส", "อ", "บ"])
614617
self.assertEqual(display_cell_tokenize("ภาษาไทย"), ["ภ", "า", "ษ", "า", "ไ", "ท", "ย"])

0 commit comments

Comments
 (0)