Skip to content

Commit 397fcc8

Browse files
committed
Working on the yyyy format for the date class
Signed-off-by: Namrata Gachchi <[email protected]>
1 parent a8d2025 commit 397fcc8

File tree

3 files changed

+26
-9
lines changed

3 files changed

+26
-9
lines changed

nemo_text_processing/text_normalization/hi/data/numbers/teens_and_ties.tsv

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,12 @@
7979
८८ अट्ठासी
8080
८९ नवासी
8181
९० नब्बे
82-
९१ इक्यानबे
83-
९२ बानबे
84-
९३ तिरानबे
85-
९४ चौरानबे
86-
९५ पंचानबे
87-
९६ छियानबे
88-
९७ सत्तानबे
89-
९८ अट्ठानबे
82+
९१ इक्यानबे
83+
९२ बानबे
84+
९३ तिरानबे
85+
९४ चौरानबे
86+
९५ पंचानबे
87+
९६ छियानबे
88+
९७ सत्तानबे
89+
९८ अट्ठानबे
9090
९९ निन्यानबे

nemo_text_processing/text_normalization/hi/taggers/date.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
days = pynini.string_file(get_abs_path("data/date/days.tsv"))
2828
months = pynini.string_file(get_abs_path("data/date/months.tsv"))
2929
year_suffix = pynini.string_file(get_abs_path("data/date/year_suffix.tsv"))
30+
digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
31+
teens_ties = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv"))
32+
teens_and_ties = pynutil.add_weight(teens_ties, -0.1)
3033

3134

3235
class DateFst(GraphFst):
@@ -52,6 +55,14 @@ def __init__(self, cardinal: GraphFst):
5255
(NEMO_HI_DIGIT + NEMO_HI_NON_ZERO + NEMO_HI_DIGIT + NEMO_HI_DIGIT), cardinal.graph_hundreds_as_thousand
5356
)
5457

58+
cardinal_graph = (
59+
digit
60+
| teens_and_ties
61+
| cardinal.graph_hundreds
62+
| graph_year_thousands
63+
| graph_year_hundreds_as_thousands
64+
)
65+
5566
graph_year = graph_year_thousands | graph_year_hundreds_as_thousands
5667

5768
delete_dash = pynutil.delete("-")
@@ -75,7 +86,7 @@ def __init__(self, cardinal: GraphFst):
7586
range_graph = pynini.cross("-", "से")
7687

7788
# Graph for century
78-
century_number = pynini.compose(pynini.closure(NEMO_HI_DIGIT, 1), cardinal.final_graph) + pynini.accep("वीं")
89+
century_number = pynini.compose(pynini.closure(NEMO_HI_DIGIT, 1), cardinal_graph) + pynini.accep("वीं")
7990
century_text = pynutil.insert("text: \"") + century_number + pynutil.insert("\"") + insert_space
8091

8192
graph_dd_mm_yyyy = (
@@ -86,6 +97,8 @@ def __init__(self, cardinal: GraphFst):
8697
months_graph + (delete_dash | delete_slash) + days_graph + (delete_dash | delete_slash) + years_graph
8798
)
8899

100+
graph_yyyy = pynutil.insert("text: \"") + pynini.compose(pynini.closure(NEMO_HI_DIGIT, 1), cardinal_graph) + pynutil.insert("\"") + insert_space + pynutil.insert(" preserve_order: true ")
101+
89102
graph_mm_dd_yyyy += pynutil.insert(" preserve_order: true ")
90103

91104
graph_mm_yyyy = months_graph + delete_dash + insert_space + years_graph
@@ -114,6 +127,7 @@ def __init__(self, cardinal: GraphFst):
114127
| pynutil.add_weight(graph_year_suffix, -0.001)
115128
| pynutil.add_weight(graph_range, -0.005)
116129
| pynutil.add_weight(century_text, -0.001)
130+
| pynutil.add_weight(graph_yyyy, -0.01)
117131
)
118132

119133
self.final_graph = final_graph.optimize()

tests/nemo_text_processing/hi/data_text_normalization/test_cases_date.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@
2222
३२७वीं सदी~तीन सौ सत्ताईसवीं सदी
2323
१८वीं शताब्दी~अठारहवीं शताब्दी
2424
१९वीं दशक~उन्नीसवीं दशक
25+
१८२३ में~अठारह सौ तेईस में
26+
१९९२ का दशक~उन्नीस सौ बानबे का दशक
27+
१९३२ शताब्दी~उन्नीस सौ बत्तीस शताब्दी

0 commit comments

Comments
 (0)