Skip to content

Commit 8d47f2d

Browse files
committed
rebasing
Signed-off-by: tbartley94 <tbartley@nvidia.com>
1 parent b0a57e0 commit 8d47f2d

File tree

4 files changed

+65
-27
lines changed

4 files changed

+65
-27
lines changed

nemo_text_processing/inverse_text_normalization/he/taggers/cardinal.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@
2121
delete_and,
2222
delete_optional_and,
2323
)
24-
from nemo_text_processing.inverse_text_normalization.he.utils import get_abs_path
24+
from nemo_text_processing.inverse_text_normalization.he.utils import (
25+
get_abs_path,
26+
)
27+
2528
from nemo_text_processing.text_normalization.en.graph_utils import (
2629
NEMO_DIGIT,
2730
NEMO_SIGMA,
2831
NEMO_SPACE,
29-
delete_space,
3032
insert_space,
33+
delete_space,
3134
)
3235
from nemo_text_processing.text_normalization.en.utils import load_labels
3336

@@ -49,10 +52,15 @@ def __init__(self):
4952
# teens
5053
graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
5154
graph_ties = pynini.string_file(get_abs_path("data/numbers/ties.tsv"))
52-
graph_ties += pynini.union(delete_space + delete_and + graph_digit, pynutil.insert("0", weight=0.001))
55+
graph_ties += pynini.union(
56+
delete_space + delete_and + graph_digit,
57+
pynutil.insert("0", weight=0.001),
58+
)
5359

5460
graph_two_digit = pynini.union(graph_teen, graph_ties)
55-
self.graph_two_digit = graph_two_digit | graph_digit
61+
self.graph_two_digit = pynini.union(
62+
graph_digit, graph_ties, pynutil.add_weight(graph_teen, -0.001)
63+
)
5664

5765
# hundreds
5866
hundred = pynini.string_map([("מאה", "1"), ("מאתיים", "2")])
@@ -69,7 +77,9 @@ def __init__(self):
6977
pynutil.insert("00", weight=0.001),
7078
)
7179
graph_hundred = pynini.union(
72-
graph_hundred, pynutil.insert("0") + graph_two_digit, pynutil.insert("00") + graph_digit
80+
graph_hundred,
81+
pynutil.insert("0") + graph_two_digit,
82+
pynutil.insert("00") + graph_digit,
7383
)
7484

7585
self.graph_hundred = graph_hundred @ (
@@ -82,12 +92,16 @@ def __init__(self):
8292
delete_thousand = pynutil.delete("אלפים") | pynutil.delete("אלף", weight=0.001)
8393

8494
large_number_prefix = pynini.union(
85-
graph_hundred, pynutil.insert("0") + graph_two_digit, pynutil.insert("00") + thousand_digit
95+
graph_hundred,
96+
pynutil.insert("0") + graph_two_digit,
97+
pynutil.insert("00") + thousand_digit,
8698
)
8799
many_thousands = large_number_prefix + delete_space + delete_thousand
88100

89101
graph_thousands = delete_optional_and + pynini.union(
90-
(pynutil.insert("00") + thousand), many_thousands, pynutil.insert("000", weight=0.001)
102+
(pynutil.insert("00") + thousand),
103+
many_thousands,
104+
pynutil.insert("000", weight=0.001),
91105
)
92106

93107
self.graph_thousands = pynini.union(graph_thousands + delete_space + graph_hundred, graph_zero)
@@ -105,11 +119,19 @@ def __init__(self):
105119
graph_millions = pynini.union(many_millions, million, pynutil.insert("000", weight=0.001))
106120

107121
graph = pynini.union(
108-
graph_millions + delete_space + graph_thousands + delete_space + graph_hundred, graph_zero
122+
graph_millions
123+
+ delete_space
124+
+ graph_thousands
125+
+ delete_space
126+
+ graph_hundred,
127+
graph_zero,
109128
)
110129

111130
graph = graph @ pynini.union(
112-
pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT), "0"
131+
pynutil.delete(pynini.closure("0"))
132+
+ pynini.difference(NEMO_DIGIT, "0")
133+
+ pynini.closure(NEMO_DIGIT),
134+
"0",
113135
)
114136

115137
labels_exception = load_labels(get_abs_path("data/numbers/digit.tsv"))

nemo_text_processing/inverse_text_normalization/he/taggers/measure.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def __init__(self, cardinal: CardinalFst, decimal: DecimalFst):
9191
spaced_units = pynini.string_file(get_abs_path("data/spaced_measurements.tsv"))
9292
spaced_units = pynini.invert(spaced_units)
9393
spaced_units = (
94-
pynutil.insert('units: "\[SPACE\]') + spaced_units + pynutil.insert('"'). # noqa: W605
94+
pynutil.insert('units: "\[SPACE\]') + spaced_units + pynutil.insert('"') # noqa: W605
9595
)
9696

9797
# in joint units the unit is concatenated to the number, in spaced unit separate the unit with a space

nemo_text_processing/inverse_text_normalization/he/verbalizers/date.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from nemo_text_processing.inverse_text_normalization.he.graph_utils import GraphFst
1919
from nemo_text_processing.text_normalization.en.graph_utils import (
2020
NEMO_NOT_QUOTE,
21+
NEMO_SPACE,
2122
delete_space,
2223
delete_zero_or_one_space,
2324
insert_space,
@@ -101,14 +102,14 @@ def __init__(self):
101102

102103
# day month and year
103104
graph_dmy = (
104-
graph_dm + delete_space + pynutil.insert('.') + pynini.closure(delete_zero_or_one_space + year, 0, 1)
105+
graph_dm + delete_space + pynutil.insert('.') + delete_zero_or_one_space + year
105106
)
106107

107108
# only month and year
108109
graph_my = (
109110
pynini.closure(month_prefix + delete_zero_or_one_space, 0, 1)
110111
+ month
111-
+ pynutil.insert(' ')
112+
+ pynutil.insert(NEMO_SPACE)
112113
+ pynini.closure(delete_zero_or_one_space + year, 0, 1)
113114
)
114115

nemo_text_processing/inverse_text_normalization/he/verbalizers/measure.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@
1515
import pynini
1616
from pynini.lib import pynutil
1717

18-
from nemo_text_processing.inverse_text_normalization.he.graph_utils import GraphFst
18+
from nemo_text_processing.inverse_text_normalization.he.graph_utils import (
19+
GraphFst,
20+
)
1921
from nemo_text_processing.text_normalization.en.graph_utils import (
2022
NEMO_CHAR,
2123
NEMO_NOT_QUOTE,
22-
NEMO_SIGMA,
23-
NEMO_SPACE,
2424
delete_space,
25+
NEMO_SPACE,
26+
NEMO_SIGMA,
2527
)
2628

2729

@@ -46,51 +48,64 @@ def __init__(self, decimal: GraphFst, cardinal: GraphFst):
4648
optional_prefix = pynini.closure(
4749
pynutil.delete("morphosyntactic_features:")
4850
+ delete_space
49-
+ pynutil.delete("\"")
51+
+ pynutil.delete('"')
5052
+ pynini.closure(NEMO_NOT_QUOTE, 1)
51-
+ pynutil.insert('-')
52-
+ pynutil.delete("\"")
53+
+ pynutil.insert("-")
54+
+ pynutil.delete('"')
5355
+ delete_space,
5456
0,
5557
1,
5658
)
5759

5860
# Removes the negative attribute and leaves the sign if occurs
5961
optional_sign = pynini.closure(
60-
pynutil.delete("negative:")
62+
pynutil.delete("code_switch:")
6163
+ delete_space
62-
+ pynutil.delete("\"")
64+
+ pynutil.delete('"')
6365
+ pynini.accep("-")
64-
+ pynutil.delete("\"")
66+
+ pynutil.delete('"')
6567
+ delete_space,
6668
0,
6769
1,
6870
)
6971

7072
graph_decimal = (
71-
pynutil.delete("decimal {") + delete_space + decimal.numbers + delete_space + pynutil.delete("}")
73+
pynutil.delete("decimal {")
74+
+ delete_space
75+
+ decimal.numbers
76+
+ delete_space
77+
+ pynutil.delete("}")
7278
)
7379

7480
graph_cardinal = (
75-
pynutil.delete("cardinal {") + delete_space + cardinal.numbers + delete_space + pynutil.delete("}")
81+
pynutil.delete("cardinal {")
82+
+ delete_space
83+
+ cardinal.numbers
84+
+ delete_space
85+
+ pynutil.delete("}")
7686
)
7787

7888
unit = (
7989
pynutil.delete("units:")
8090
+ delete_space
81-
+ pynutil.delete("\"")
91+
+ pynutil.delete('"')
8292
+ pynini.closure(NEMO_CHAR - NEMO_SPACE, 1)
83-
+ pynutil.delete("\"")
93+
+ pynutil.delete('"')
8494
+ delete_space
8595
)
8696
unit @= pynini.cdrewrite(
87-
pynini.cross("\[SPACE\]", NEMO_SPACE), "", "", NEMO_SIGMA
97+
pynini.cross("\[SPACE\]", NEMO_SPACE), "", "", NEMO_SIGMA # noqa: W605
8898
) # For space separated measures.
8999

90100
numbers_units = delete_space + unit
91101
numbers_graph = (graph_cardinal | graph_decimal) + numbers_units
92102

93-
one_graph = delete_space + pynutil.insert("1") + unit + pynutil.delete("cardinal { integer: \"1\" }")
103+
one_graph = (
104+
delete_space
105+
+ pynutil.insert("1")
106+
+ unit
107+
+ pynutil.delete('cardinal { integer: "1" }')
108+
)
94109

95110
graph = optional_prefix + optional_sign + (numbers_graph | one_graph)
96111
delete_tokens = self.delete_tokens(graph)

0 commit comments

Comments
 (0)