Skip to content

Commit ec217f3

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 1ff8ec9 commit ec217f3

File tree

5 files changed

+54
-46
lines changed

5 files changed

+54
-46
lines changed

nemo_text_processing/text_normalization/fr/taggers/date.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
import pynini
22
from pynini.lib import pynutil
3-
from nemo_text_processing.text_normalization.fr.utils import get_abs_path
4-
5-
from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, NEMO_DIGIT
63

4+
from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst
5+
from nemo_text_processing.text_normalization.fr.utils import get_abs_path
76

8-
# TODO: add articles? 'le...'
7+
# TODO: add articles? 'le...'
98

109
month_numbers = pynini.string_file(get_abs_path("data/dates/months.tsv"))
1110
eras = pynini.string_file(get_abs_path("data/dates/eras.tsv"))
12-
delete_leading_zero = (pynutil.delete("0") | (NEMO_DIGIT - "0")) + NEMO_DIGIT #reminder, NEMO_DIGIT = filter on digits
11+
delete_leading_zero = (
12+
pynutil.delete("0") | (NEMO_DIGIT - "0")
13+
) + NEMO_DIGIT # reminder, NEMO_DIGIT = filter on digits
14+
1315

1416
class DateFst(GraphFst):
1517
''' Finite state transducer for classyfing dates, e.g.:
1618
'02.03.2003' -> date {day: 'deux' month: 'mai' year: 'deux mille trois' preserve order: true}
1719
'''
20+
1821
def __init__(self, cardinal: GraphFst, deterministic: bool = True):
1922
super().__init__(name="dates", kind="classify")
2023

@@ -23,10 +26,10 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
2326
# 'le' -> 'le', 'les' -> 'les'
2427
le_determiner = pynini.accep("le ") | pynini.accep("les ")
2528
self.optional_le = pynini.closure(le_determiner, 0, 1)
26-
27-
# '01' -> 'un'
29+
30+
# '01' -> 'un'
2831
optional_leading_zero = delete_leading_zero | NEMO_DIGIT
29-
valid_day_number = pynini.union(*[str(x) for x in range(1,32)])
32+
valid_day_number = pynini.union(*[str(x) for x in range(1, 32)])
3033
premier = pynini.string_map([("1", "premier")])
3134
day_number_to_word = premier | cardinal_graph
3235

@@ -59,7 +62,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
5962
)
6063

6164
# Accepts "janvier", "février", etc
62-
month_name_graph = pynutil.insert("month: \"") + month_numbers.project("output") + pynutil.insert("\"")
65+
month_name_graph = pynutil.insert("month: \"") + month_numbers.project("output") + pynutil.insert("\"")
6366

6467
self.fst |= (
6568
pynutil.insert("date { ")
@@ -73,9 +76,8 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
7376
# Accepts "70s", "80s", etc
7477
self.fst |= pynutil.insert("date { year: \"") + eras + pynutil.insert("\" preserve_order: true }")
7578

76-
7779
# Accepts date ranges, "17-18-19 juin" -> date { day: "17" day: "18": day: "19"}
78-
for separator in ["-", "/"]:
80+
for separator in ["-", "/"]:
7981
day_range_graph = (
8082
pynutil.insert("day: \"")
8183
+ pynini.closure(digit_to_day + pynutil.delete(separator) + pynutil.insert(" "), 1)
@@ -95,16 +97,17 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
9597
self.fst = self.fst.optimize()
9698

9799

98-
99100
def apply_fst(text, fst):
100101
try:
101102
output = pynini.shortestpath(text @ fst).string()
102103
print(f"'{text}' --> '{output}'")
103104
except pynini.FstOpError:
104105
print(f"Error: No valid output with given input: '{text}'")
105106

107+
106108
if __name__ == "__main__":
107109
from nemo_text_processing.text_normalization.fr.taggers.cardinal import CardinalFst
110+
108111
fst = DateFst(CardinalFst())
109112

110113
print('DETERMINER')
@@ -132,7 +135,7 @@ def apply_fst(text, fst):
132135
apply_fst("02/03/2003", fst.fst)
133136
apply_fst("02-03-2003", fst.fst)
134137
apply_fst("le 02.03.2003", fst.fst)
135-
138+
136139
apply_fst("02.03", fst.fst)
137140
apply_fst("17 janvier", fst.fst)
138141
apply_fst("10 mars 2023", fst.fst)
@@ -142,4 +145,6 @@ def apply_fst(text, fst):
142145
apply_fst("80s", fst.fst)
143146

144147
print("\nDATE RANGES")
145-
apply_fst("les 17/18/19 juin", fst.fst) # returns: date { day: "les dix-sept" day: "dix-huit" day: "dix-neuf" month: "juin" preserve_order: true }
148+
apply_fst(
149+
"les 17/18/19 juin", fst.fst
150+
) # returns: date { day: "les dix-sept" day: "dix-huit" day: "dix-neuf" month: "juin" preserve_order: true }

nemo_text_processing/text_normalization/fr/taggers/tokenize_and_classify.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@
2626
)
2727
from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst
2828
from nemo_text_processing.text_normalization.fr.taggers.cardinal import CardinalFst
29+
from nemo_text_processing.text_normalization.fr.taggers.date import DateFst
2930
from nemo_text_processing.text_normalization.fr.taggers.decimals import DecimalFst
3031
from nemo_text_processing.text_normalization.fr.taggers.fraction import FractionFst
3132
from nemo_text_processing.text_normalization.fr.taggers.ordinal import OrdinalFst
3233
from nemo_text_processing.text_normalization.fr.taggers.whitelist import WhiteListFst
3334
from nemo_text_processing.text_normalization.fr.taggers.word import WordFst
34-
from nemo_text_processing.text_normalization.fr.taggers.date import DateFst
3535
from nemo_text_processing.utils.logging import logger
3636

3737

@@ -86,7 +86,7 @@ def __init__(
8686
self.whitelist = WhiteListFst(input_case=input_case, deterministic=deterministic, input_file=whitelist)
8787
whitelist_graph = self.whitelist.fst
8888
punct_graph = PunctuationFst(deterministic=deterministic).fst
89-
89+
9090
self.date = DateFst(self.cardinal, deterministic=deterministic)
9191
date_graph = self.date.fst
9292

nemo_text_processing/text_normalization/fr/verbalizers/date.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@
1919
NEMO_NOT_QUOTE,
2020
NEMO_SPACE,
2121
GraphFst,
22-
delete_preserve_order
22+
delete_preserve_order,
2323
)
2424

25+
2526
class DateFst(GraphFst):
2627
"""
2728
Finite state transducer for verbalizing date, e.g.
@@ -50,13 +51,15 @@ def __init__(self, deterministic: bool = True):
5051
delete_tokens = self.delete_tokens(self.graph)
5152
self.fst = delete_tokens.optimize()
5253

54+
5355
def apply_fst(text, fst):
5456
try:
5557
output = pynini.shortestpath(text @ fst).string()
5658
print(f"'{text}' --> '{output}'")
5759
except pynini.FstOpError:
5860
print(f"Error: No valid output with given input: '{text}'")
5961

62+
6063
if __name__ == "__main__":
6164
fst = DateFst()
6265

nemo_text_processing/text_normalization/fr/verbalizers/verbalize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
1515
from nemo_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst
1616
from nemo_text_processing.text_normalization.fr.verbalizers.cardinal import CardinalFst
17+
from nemo_text_processing.text_normalization.fr.verbalizers.date import DateFst
1718
from nemo_text_processing.text_normalization.fr.verbalizers.decimals import DecimalFst
1819
from nemo_text_processing.text_normalization.fr.verbalizers.fraction import FractionFst
1920
from nemo_text_processing.text_normalization.fr.verbalizers.ordinal import OrdinalFst
20-
from nemo_text_processing.text_normalization.fr.verbalizers.date import DateFst
2121

2222

2323
class VerbalizeFst(GraphFst):

nemo_text_processing/text_normalization/fr_tutorial/taggers/my_test_script.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,47 +3,47 @@
33

44
from nemo_text_processing.text_normalization.fr.utils import get_abs_path
55

6+
67
def apply_fst(text, fst):
7-
""" Given a string input, returns the output string
8+
""" Given a string input, returns the output string
89
produced by traversing the path with lowest weight.
910
If no valid path accepts input string, returns an
1011
error.
1112
"""
12-
try:
13-
print(pynini.shortestpath(text @ fst).string())
14-
except pynini.FstOpError:
15-
print(f"Error: No valid output with given input: '{text}'")
16-
17-
zero = pynini.string_map([("zéro","0")]) # French only pronounces zeroes as stand alone
18-
digits_map = pynini.string_map([ # pynini function that creates explicit input-output mappings for a WFST
19-
("un","1"),
20-
("une","1"),
21-
("deux","2"),
22-
("trois","3"),
23-
("quatre","4"),
24-
("cinq","5"),
25-
("six","6"),
26-
("sept","7"),
27-
("huit","8"),
28-
("neuf","9")
29-
])
13+
try:
14+
print(pynini.shortestpath(text @ fst).string())
15+
except pynini.FstOpError:
16+
print(f"Error: No valid output with given input: '{text}'")
17+
18+
19+
zero = pynini.string_map([("zéro", "0")]) # French only pronounces zeroes as stand alone
20+
digits_map = pynini.string_map(
21+
[ # pynini function that creates explicit input-output mappings for a WFST
22+
("un", "1"),
23+
("une", "1"),
24+
("deux", "2"),
25+
("trois", "3"),
26+
("quatre", "4"),
27+
("cinq", "5"),
28+
("six", "6"),
29+
("sept", "7"),
30+
("huit", "8"),
31+
("neuf", "9"),
32+
]
33+
)
3034

3135
digits = pynini.string_file("data/numbers/digits.tsv")
3236

33-
teens = pynini.string_map([
34-
("onze", "11"),
35-
("douze", "12"),
36-
("treize", "13"),
37-
("quatorze", "14"),
38-
("quinze", "16"),
39-
])
37+
teens = pynini.string_map([("onze", "11"), ("douze", "12"), ("treize", "13"), ("quatorze", "14"), ("quinze", "16"),])
4038

4139
tens = pynini.string_map([("dix", "1")])
42-
delete_hyphen = pynini.closure(pynutil.delete("-"), 0, 1) # Applies a closure from 0-1 of operation. Equivalent to regex /?/
40+
delete_hyphen = pynini.closure(
41+
pynutil.delete("-"), 0, 1
42+
) # Applies a closure from 0-1 of operation. Equivalent to regex /?/
4343

4444
graph_tens = tens + delete_hyphen + digits
4545
graph_tens_and_teens = graph_tens | teens
4646

4747
graph_digits = digits | pynutil.insert("0")
4848

49-
apply_fst("un", graph_tens_and_teens)
49+
apply_fst("un", graph_tens_and_teens)

0 commit comments

Comments
 (0)