Skip to content

Commit d281b78

Browse files
authored
Update test_util.py
1 parent 0cb4012 commit d281b78

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

tests/core/test_util.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,25 @@ def test_normalize(self):
506506
self.assertEqual(normalize("กา าาะา"), "กาะา")
507507

508508
# remove repeating tone marks
509+
self.assertEqual(normalize("\u0e01\u0e48\u0e48"), "\u0e01\u0e48")
510+
511+
# remove repeating different tone marks
512+
self.assertEqual(normalize("\u0e01\u0e48\u0e49"), "\u0e01\u0e49")
513+
self.assertEqual(
514+
normalize("\u0e01\u0e48\u0e49\u0e48\u0e49"), "\u0e01\u0e49"
515+
)
516+
517+
# remove tone mark at the beginning of text
518+
self.assertEqual(remove_dangling("\u0e48\u0e01"), "\u0e01")
519+
self.assertEqual(remove_dangling("\u0e48\u0e48\u0e01"), "\u0e01")
520+
self.assertEqual(remove_dangling("\u0e48\u0e49\u0e01"), "\u0e01")
521+
self.assertEqual(remove_dangling("\u0e48\u0e01\u0e48"), "\u0e01\u0e48")
522+
523+
# remove duplicate spaces
524+
self.assertEqual(remove_dup_spaces(" ab c d "), "ab c d")
525+
self.assertEqual(remove_dup_spaces("\nab c \n d \n"), "ab c\nd")
526+
527+
# remove tone marks
509528
self.assertEqual(remove_tonemark("จิ้น"), "จิน")
510529
self.assertEqual(remove_tonemark("เก๋า"), "เกา")
511530

0 commit comments

Comments
 (0)