Skip to content

Commit 3a54e71

Browse files
authored
Merge pull request #14 from wannaphong/copilot/port-bahttext-to-thainlp-net
Port number-to-Thai-word conversion from PyThaiNLP
2 parents 6bbdb13 + c427b0c commit 3a54e71

File tree

3 files changed

+239
-0
lines changed

3 files changed

+239
-0
lines changed

README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ Thai NLP in .NET
1010
### Subword Tokenization
1111
- **TCC** (Thai Character Cluster) tokenization for breaking text into character clusters
1212

13+
### Number to Thai Word Conversion
14+
- **NumToThaiWord** - Convert numbers to Thai text representation
15+
- **BahtText** - Convert numbers to Thai currency format (Baht and Satang)
16+
1317
## Installation
1418

1519
### From NuGet (Recommended)
@@ -91,6 +95,26 @@ var clusters = Subword.tcc("ประเทศไทย");
9195
var positions = Subword.tcc_pos("ประเทศไทย");
9296
```
9397

98+
### Number to Thai Word Conversion
99+
100+
```csharp
101+
using Thainlp;
102+
103+
// Convert number to Thai words
104+
string text = NumToWord.NumToThaiWord(112);
105+
// Output: หนึ่งร้อยสิบสอง
106+
107+
string negative = NumToWord.NumToThaiWord(-273);
108+
// Output: ลบสองร้อยเจ็ดสิบสาม
109+
110+
// Convert to Thai Baht currency format
111+
string baht = NumToWord.BahtText(5611116.50);
112+
// Output: ห้าล้านหกแสนหนึ่งหมื่นหนึ่งพันหนึ่งร้อยสิบหกบาทห้าสิบสตางค์
113+
114+
string simple = NumToWord.BahtText(116);
115+
// Output: หนึ่งร้อยสิบหกบาทถ้วน
116+
```
117+
94118
## API Compatibility with PyThaiNLP
95119

96120
This library provides an API similar to PyThaiNLP:
@@ -101,6 +125,8 @@ This library provides an API similar to PyThaiNLP:
101125
| `word_tokenize(text, engine="newmm")` | `WordTokenizer.WordTokenize(text, engine: "newmm")` |
102126
| `word_tokenize(text, custom_dict=trie)` | `WordTokenizer.WordTokenize(text, customDict: trie)` |
103127
| `word_tokenize(text, keep_whitespace=False)` | `WordTokenizer.WordTokenize(text, keepWhitespace: false)` |
128+
| `num_to_thaiword(number)` | `NumToWord.NumToThaiWord(number)` |
129+
| `bahttext(number)` | `NumToWord.BahtText(number)` |
104130

105131
## Testing
106132

ThaiNLPTest/NumToWordTest.cs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
using Microsoft.VisualStudio.TestTools.UnitTesting;
2+
using Thainlp;
3+
4+
namespace ThaiNLPTest
5+
{
6+
[TestClass]
7+
public class NumToWordTest
8+
{
9+
[TestMethod]
10+
public void TestBahtText()
11+
{
12+
Assert.AreEqual(
13+
"ห้าล้านหกแสนหนึ่งหมื่นหนึ่งพันหนึ่งร้อยสิบหกบาทห้าสิบสตางค์",
14+
NumToWord.BahtText(5611116.50)
15+
);
16+
Assert.AreEqual("หนึ่งร้อยสิบหกบาทถ้วน", NumToWord.BahtText(116));
17+
Assert.AreEqual("ศูนย์บาทถ้วน", NumToWord.BahtText(0));
18+
Assert.AreEqual("", NumToWord.BahtText(null));
19+
}
20+
21+
[TestMethod]
22+
public void TestNumToThaiWord()
23+
{
24+
Assert.AreEqual("", NumToWord.NumToThaiWord(null));
25+
Assert.AreEqual("ศูนย์", NumToWord.NumToThaiWord(0));
26+
Assert.AreEqual("หนึ่งร้อยสิบสอง", NumToWord.NumToThaiWord(112));
27+
Assert.AreEqual("ลบสองร้อยเจ็ดสิบสาม", NumToWord.NumToThaiWord(-273));
28+
}
29+
}
30+
}

thainlp/NumToWord.cs

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace Thainlp
6+
{
7+
/// <summary>
8+
/// Convert number value to Thai read out.
9+
/// Adapted from PyThaiNLP's num_to_thaiword implementation.
10+
/// </summary>
11+
public class NumToWord
12+
{
13+
private static readonly string[] _VALUES = new[]
14+
{
15+
"",
16+
"หนึ่ง",
17+
"สอง",
18+
"สาม",
19+
"สี่",
20+
"ห้า",
21+
"หก",
22+
"เจ็ด",
23+
"แปด",
24+
"เก้า"
25+
};
26+
27+
private static readonly string[] _PLACES = new[]
28+
{
29+
"",
30+
"สิบ",
31+
"ร้อย",
32+
"พัน",
33+
"หมื่น",
34+
"แสน",
35+
"ล้าน"
36+
};
37+
38+
private static readonly Dictionary<string, string> _EXCEPTIONS = new Dictionary<string, string>
39+
{
40+
{ "หนึ่งสิบ", "สิบ" },
41+
{ "สองสิบ", "ยี่สิบ" },
42+
{ "สิบหนึ่ง", "สิบเอ็ด" }
43+
};
44+
45+
/// <summary>
46+
/// This function converts a number to Thai text and adds
47+
/// a suffix "บาท" (Baht).
48+
/// The precision will be fixed at two decimal places (0.00)
49+
/// to fits "สตางค์" (Satang) unit.
50+
/// This function works similar to BAHTTEXT function in Microsoft Excel.
51+
/// </summary>
52+
/// <param name="number">Number to be converted into Thai Baht currency format</param>
53+
/// <returns>Text representing the amount of money in the format of Thai currency</returns>
54+
/// <example>
55+
/// <code>
56+
/// NumToWord.BahtText(1)
57+
/// // output: หนึ่งบาทถ้วน
58+
///
59+
/// NumToWord.BahtText(21)
60+
/// // output: ยี่สิบเอ็ดบาทถ้วน
61+
///
62+
/// NumToWord.BahtText(200)
63+
/// // output: สองร้อยบาทถ้วน
64+
/// </code>
65+
/// </example>
66+
public static string BahtText(double? number)
67+
{
68+
if (number == null)
69+
{
70+
return "";
71+
}
72+
else if (number == 0)
73+
{
74+
return "ศูนย์บาทถ้วน";
75+
}
76+
else
77+
{
78+
string formatted = number.Value.ToString("F2");
79+
string[] parts = formatted.Split('.');
80+
81+
// Use long to handle larger numbers
82+
long numInt = long.Parse(parts[0]);
83+
int numDec = int.Parse(parts[1]);
84+
85+
var result = new StringBuilder();
86+
87+
string baht = NumToThaiWord(numInt);
88+
if (!string.IsNullOrEmpty(baht))
89+
{
90+
result.Append(baht).Append("บาท");
91+
}
92+
93+
string satang = NumToThaiWord(numDec);
94+
if (!string.IsNullOrEmpty(satang) && satang != "ศูนย์")
95+
{
96+
result.Append(satang).Append("สตางค์");
97+
}
98+
else
99+
{
100+
result.Append("ถ้วน");
101+
}
102+
103+
return result.ToString();
104+
}
105+
}
106+
107+
/// <summary>
108+
/// This function converts number to Thai text.
109+
/// </summary>
110+
/// <param name="number">An integer number to be converted to Thai text</param>
111+
/// <returns>Text representing the number in Thai</returns>
112+
/// <example>
113+
/// <code>
114+
/// NumToWord.NumToThaiWord(1)
115+
/// // output: หนึ่ง
116+
///
117+
/// NumToWord.NumToThaiWord(11)
118+
/// // output: สิบเอ็ด
119+
/// </code>
120+
/// </example>
121+
public static string NumToThaiWord(int? number)
122+
{
123+
if (number == null)
124+
{
125+
return "";
126+
}
127+
return NumToThaiWord((long)number.Value);
128+
}
129+
130+
/// <summary>
131+
/// This function converts number to Thai text.
132+
/// </summary>
133+
/// <param name="number">A long number to be converted to Thai text</param>
134+
/// <returns>Text representing the number in Thai</returns>
135+
public static string NumToThaiWord(long? number)
136+
{
137+
if (number == null)
138+
{
139+
return "";
140+
}
141+
else if (number == 0)
142+
{
143+
return "ศูนย์";
144+
}
145+
146+
long numberTemp = number.Value;
147+
string numberStr = Math.Abs(number.Value).ToString();
148+
char[] digits = numberStr.ToCharArray();
149+
Array.Reverse(digits);
150+
151+
var output = new StringBuilder();
152+
153+
for (int place = 0; place < digits.Length; place++)
154+
{
155+
if (place % 6 == 0 && place > 0)
156+
{
157+
output.Insert(0, _PLACES[6]);
158+
}
159+
160+
if (digits[place] != '0')
161+
{
162+
int value = int.Parse(digits[place].ToString());
163+
output.Insert(0, _PLACES[place % 6]);
164+
output.Insert(0, _VALUES[value]);
165+
}
166+
}
167+
168+
string result = output.ToString();
169+
170+
foreach (var exception in _EXCEPTIONS)
171+
{
172+
result = result.Replace(exception.Key, exception.Value);
173+
}
174+
175+
if (numberTemp < 0)
176+
{
177+
result = "ลบ" + result;
178+
}
179+
180+
return result;
181+
}
182+
}
183+
}

0 commit comments

Comments
 (0)