Skip to content

Commit 4861f58

Browse files
committed
Fix date ranges
1 parent fd3e7bf commit 4861f58

File tree

3 files changed

+15
-8
lines changed

3 files changed

+15
-8
lines changed

nemo_text_processing/text_normalization/fr/taggers/date.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,16 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
7676

7777
# Accepts date ranges, "17-18-19 juin" -> date { day: "17" day: "18": day: "19"}
7878
for separator in ["-", "/"]:
79+
day_range_graph = (
80+
pynutil.insert("day: \"")
81+
+ pynini.closure(digit_to_day + pynutil.delete(separator) + pynutil.insert(" "), 1)
82+
+ digit_to_day
83+
+ pynutil.insert("\"")
84+
)
85+
7986
self.fst |= (
8087
pynutil.insert("date { ")
81-
+ pynini.closure(self.day_graph + pynutil.delete(separator) + pynutil.insert(" "), 1)
82-
+ self.day_graph
88+
+ day_range_graph
8389
+ pynini.accep(" ")
8490
+ month_name_graph
8591
+ pynini.closure(pynini.accep(" ") + self.year_graph, 0, 1)
@@ -92,7 +98,8 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
9298

9399
def apply_fst(text, fst):
94100
try:
95-
print(text, "-->", pynini.shortestpath(text @ fst).string())
101+
output = pynini.shortestpath(text @ fst).string()
102+
print(f"'{text}' --> '{output}'")
96103
except pynini.FstOpError:
97104
print(f"Error: No valid output with given input: '{text}'")
98105

nemo_text_processing/text_normalization/fr/verbalizers/date.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def __init__(self, deterministic: bool = True):
4141
year = pynutil.delete("year: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
4242
decade = pynutil.delete("decade: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
4343

44-
graph_dmy = pynini.closure(day + NEMO_SPACE, 1) + month + pynini.closure(NEMO_SPACE + year, 0, 1) + delete_preserve_order
44+
graph_dmy = day + NEMO_SPACE + month + pynini.closure(NEMO_SPACE + year, 0, 1) + delete_preserve_order
4545
graph_my = month + NEMO_SPACE + year + delete_preserve_order
4646
graph_decade = decade + delete_preserve_order
4747

@@ -50,15 +50,15 @@ def __init__(self, deterministic: bool = True):
5050
delete_tokens = self.delete_tokens(self.graph)
5151
self.fst = delete_tokens.optimize()
5252

53-
5453
def apply_fst(text, fst):
5554
try:
56-
print(text, "-->", pynini.shortestpath(text @ fst).string())
55+
output = pynini.shortestpath(text @ fst).string()
56+
print(f"'{text}' --> '{output}'")
5757
except pynini.FstOpError:
5858
print(f"Error: No valid output with given input: '{text}'")
5959

6060
if __name__ == "__main__":
6161
fst = DateFst()
6262

6363
# tagger output for "les 17/18/19 juin"
64-
apply_fst('date { day: "les dix-sept" day: "dix-huit" day: "dix-neuf" month: "juin" preserve_order: true }', fst.fst)
64+
apply_fst('date { day: "les dix-sept dix-huit dix-neuf" month: "juin" preserve_order: true }', fst.fst)

tests/nemo_text_processing/fr/data_text_normalization/test_cases_date.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ le 02.03.2003~le deux mars deux mille trois
88
le 10 mars 2023~le dix mars deux mille vingt-trois
99
les 80s~les eighties
1010
les 17/18 juin~les dix-sept dix-huit juin
11-
les 17/18/19 mars~les ldix-sept dix-huit dix-neuf mars
11+
les 17/18/19 mars~les dix-sept dix-huit dix-neuf mars
1212
les 17-18-19 juin~les dix-sept dix-huit dix-neuf juin
1313
les 17-18-19 juin 2025~les dix-sept dix-huit dix-neuf juin deux mille vingt-cinq

0 commit comments

Comments
 (0)