Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 50 additions & 16 deletions pythainlp/khavee/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from typing import List, Union

from pythainlp import thai_consonants
from pythainlp.tokenize import subword_tokenize
from pythainlp.util import remove_tonemark, sound_syllable

Expand All @@ -16,6 +17,22 @@ def __init__(self):
KhaveeVerifier: Thai Poetry verifier
"""

def _has_true_final_yl(self, word: str) -> bool:
"""
Check if ย or ล is a true final consonant
(not just part of the vowel sound with ไ/ใ)

:param str word: Thai word
:return: True if ย or ล is a true final consonant
:rtype: bool
"""
if len(word) < 2:
return False
# Count consonants in the word
consonant_count = sum(1 for c in word if c in thai_consonants)
# If there are 2+ consonants and word ends with ย or ล, it's a true final
return consonant_count >= 2 and word[-1] in ["ย", "ล"]

def check_sara(self, word: str) -> str:
"""
Check the vowels in the Thai word.
Expand Down Expand Up @@ -223,15 +240,35 @@ def check_marttra(self, word: str) -> str:
print(kv.check_marttra("สาว"))
# output: 'เกอว'
"""
if word[-1] == "ร" and word[-2] in ["ต", "ท"]:
word = word[:-1]
# Handle consonant clusters ending with ร
# ตร, ทร → remove ร (treat as final ต/ท sound)
# กร, ขร, คร, ฆร in compound words → remove ร (treat as final ก/ข/ค sound)
# But single syllable words like "กร" should keep ร
if len(word) >= 3 and word[-1] == "ร":
if word[-2] in ["ต", "ท"]:
word = word[:-1]
elif word[-2] in ["ก", "ข", "ค", "ฆ"]:
word = word[:-1]

word = self.handle_karun_sound_silence(word)
word = remove_tonemark(word)

# Check for ำ at the end (represents "am" sound, ends with m)
if word[-1] == "ำ":
return "กม"

# Check for vowels and special patterns that indicate open syllables (กา)
# For words with ไ/ใ, check if ย/ล is a true final or just part of vowel
if "ไ" in word or "ใ" in word:
if word[-1] not in ["ย", "ล"]:
return "กา"
elif not self._has_true_final_yl(word):
# ย/ล is part of the vowel sound, not a true final
return "กา"
# else: ย/ล is a true final, continue to consonant classification below

if (
"ำ" in word
or ("ํ" in word and "า" in word)
or "ไ" in word
or "ใ" in word
("ํ" in word and "า" in word)
):
return "กา"
elif (
Expand All @@ -245,10 +282,9 @@ def check_marttra(self, word: str) -> str:
elif word[-1] in ["ม"]:
return "กม"
elif word[-1] in ["ย"]:
if "ั" in word:
return "กา"
else:
return "เกย"
return "เกย"
elif word[-1] in ["ล"]:
return "เกย"
elif word[-1] in ["ว"]:
return "เกอว"
elif word[-1] in ["ก", "ข", "ค", "ฆ"]:
Expand All @@ -272,7 +308,7 @@ def check_marttra(self, word: str) -> str:
"ส",
]:
return "กด"
elif word[-1] in ["ญ", ", ณ", "น", "ร", "ล", "ฬ"]:
elif word[-1] in ["ญ", "ณ", "น", "ร", "ฬ"]:
return "กน"
elif word[-1] in ["บ", "ป", "พ", "ฟ", "ภ"]:
return "กบ"
Expand Down Expand Up @@ -649,9 +685,7 @@ def handle_karun_sound_silence(self, word: str) -> str:
sound_silenced = word.endswith("์")
if not sound_silenced:
return word
thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"
locate_silenced = word.rfind("์") - 1
can_silence_two = word[locate_silenced - 2] in thai_consonants
cut_off = 2 if can_silence_two else 1
word = word[: locate_silenced + 1 - cut_off]
# Remove ์ and the silent consonant before it
# การันต์ (์) marks the consonant immediately before it as silent
word = word[:-2]
return word
20 changes: 10 additions & 10 deletions tests/core/test_khavee.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@ def test_check_marttra(self):
self.assertEqual(kv.check_marttra("จาม"), "กม")
self.assertEqual(kv.check_marttra("ยิ้ม"), "กม")
self.assertEqual(kv.check_marttra("เกม"), "กม")
# self.assertEqual(kv.check_marttra("ขำ"), "กม")
# self.assertEqual(kv.check_marttra("รมย์"), "กม")
self.assertEqual(kv.check_marttra("ขำ"), "กม")
self.assertEqual(kv.check_marttra("รมย์"), "กม")

self.assertEqual(kv.check_marttra("สวย"), "เกย")
self.assertEqual(kv.check_marttra("โปรย"), "เกย")
self.assertEqual(kv.check_marttra("เนย"), "เกย")
self.assertEqual(kv.check_marttra("คอย"), "เกย")
self.assertEqual(kv.check_marttra("ง่าย"), "เกย")
# self.assertEqual(kv.check_marttra("ทัย"), "เกย")
# self.assertEqual(kv.check_marttra("ไทย"), "เกย")
# self.assertEqual(kv.check_marttra("ไกล"), "เกย")
# self.assertEqual(kv.check_marttra("ใกล้"), "เกย")
self.assertEqual(kv.check_marttra("ทัย"), "เกย")
self.assertEqual(kv.check_marttra("ไทย"), "เกย")
self.assertEqual(kv.check_marttra("ไกล"), "เกย")
self.assertEqual(kv.check_marttra("ใกล้"), "เกย")

self.assertEqual(kv.check_marttra("สาว"), "เกอว")
self.assertEqual(kv.check_marttra("นิ้ว"), "เกอว")
Expand All @@ -51,15 +51,15 @@ def test_check_marttra(self):
self.assertEqual(kv.check_marttra("โรค"), "กก")
self.assertEqual(kv.check_marttra("ลาก"), "กก")
self.assertEqual(kv.check_marttra("นัข"), "กก")
# self.assertEqual(kv.check_marttra("จักร"), "กก")
self.assertEqual(kv.check_marttra("จักร"), "กก")

self.assertEqual(kv.check_marttra("จด"), "กด")
self.assertEqual(kv.check_marttra("ตรวจ"), "กด")
self.assertEqual(kv.check_marttra("เสริฐ"), "กด")
self.assertEqual(kv.check_marttra("บุตร"), "กด")
self.assertEqual(kv.check_marttra("ตรุษ"), "กด")
self.assertEqual(kv.check_marttra("มืด"), "กด")
# self.assertEqual(kv.check_marttra("โยชน์"), "กด")
self.assertEqual(kv.check_marttra("โยชน์"), "กด")

self.assertEqual(kv.check_marttra("มึน"), "กน")
self.assertEqual(kv.check_marttra("ร้าน"), "กน")
Expand All @@ -70,8 +70,8 @@ def test_check_marttra(self):
self.assertEqual(kv.check_marttra("บรร"), "กน")
self.assertEqual(kv.check_marttra("กร"), "กน")
self.assertEqual(kv.check_marttra("เณร"), "กน")
# self.assertEqual(kv.check_marttra("ยนต์"), "กน")
# self.assertEqual(kv.check_marttra("กรรณ"), "กน")
self.assertEqual(kv.check_marttra("ยนต์"), "กน")
self.assertEqual(kv.check_marttra("กรรณ"), "กน")

self.assertEqual(kv.check_marttra("ชอบ"), "กบ")
self.assertEqual(kv.check_marttra("ภาพ"), "กบ")
Expand Down
Loading