Skip to content

Commit 19a1c5a

Browse files
Copilotwannaphong
andcommitted
Address code review feedback
- Remove unused is_valid_numeric_continuation function - Fix import example in newmm.py docstring Co-authored-by: wannaphong <8536487+wannaphong@users.noreply.github.com>
1 parent 3b4613f commit 19a1c5a

File tree

2 files changed

+1
-33
lines changed

2 files changed

+1
-33
lines changed

cthainlp/newmm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def segment(
2626
list: List of tokens (strings). Returns empty list if text is None or empty.
2727
2828
Examples:
29-
>>> from cthainlp.tokenize import newmm
29+
>>> from cthainlp import newmm
3030
>>> newmm.segment("ฉันไปโรงเรียน")
3131
['ฉัน', 'ไป', 'โรงเรียน']
3232

src/newmm.c

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,38 +23,6 @@ typedef struct {
2323
int size;
2424
} Graph;
2525

26-
/* Helper: Check if a numeric sequence is valid (e.g., 19.84, 127.0.0.1, 1,984.42) */
27-
static bool is_valid_numeric_continuation(const char* text, int start, int end) {
28-
/* Count dots and commas */
29-
int dot_count = 0;
30-
int comma_count = 0;
31-
int last_special = -1;
32-
33-
for (int i = start; i < end; i++) {
34-
if (text[i] == '.') {
35-
dot_count++;
36-
last_special = i;
37-
} else if (text[i] == ',') {
38-
comma_count++;
39-
last_special = i;
40-
}
41-
}
42-
43-
/* If ends with special char, check if followed by more special chars */
44-
if (last_special == end - 1 && end < (int)strlen(text)) {
45-
char next = text[end];
46-
if (next == '.' || next == ',') {
47-
/* Multiple punctuation in a row - not valid numeric continuation */
48-
return false;
49-
}
50-
}
51-
52-
/* Allow IP addresses (multiple dots with digits) */
53-
/* Allow decimals (one dot) */
54-
/* Allow formatted numbers (commas and at most one dot) */
55-
return true;
56-
}
57-
5826
/* Helper: Check if character is non-Thai */
5927
static bool is_non_thai_char(int codepoint) {
6028
/* Latin letters, digits, spaces */

0 commit comments

Comments
 (0)