Skip to content

Commit cefa248

Browse files
ngachchipre-commit-ci[bot]
authored andcommitted
Future Implementations for classes - Measure, Money, and Date (NVIDIA#258)
* Future Implementations for classes - Measure, Money, and Date Signed-off-by: Namrata Gachchi <[email protected]> * Resolved the conflicts with mm_yyyy and date ranges and added the previously removed failing test cases. Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed the unused empty string implementation Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * minor fixes for the tagger files Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * reformatted decimal final graph Signed-off-by: Namrata Gachchi <[email protected]> * incorporated the suggestion for decimal graph Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Century implementations Signed-off-by: Namrata Gachchi <[email protected]> * Working on the yyyy format for the date class Signed-off-by: Namrata Gachchi <[email protected]> * reverted yyyy code Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * working on future implementations Signed-off-by: Namrata Gachchi <[email protected]> * working on improving the date class accuracy Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added year prefix for the date class Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * working on the commma cases for date class Signed-off-by: Namrata Gachchi <[email protected]> * minor fixes Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * implemented mixed fractions Signed-off-by: Namrata Gachchi <[email protected]> * rectified the test case Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * working on quarterly measurements Signed-off-by: Namrata Gachchi <[email protected]> * reformatted the prefixes and suffixes for date tagger class Signed-off-by: Namrata Gachchi <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * replaced text tag with era tag for the date class Signed-off-by: Namrata Gachchi <[email protected]> * Removed the text tag reference from date class verbalizer Signed-off-by: Namrata Gachchi <[email protected]> --------- Signed-off-by: Namrata Gachchi <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent a595f9c commit cefa248

File tree

4 files changed

+22
-34
lines changed

4 files changed

+22
-34
lines changed

Jenkinsfile

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,24 @@ pipeline {
1111
}
1212
environment {
1313

14-
AR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-24-24-0'
15-
DE_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-23-24-0'
16-
EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-25-0'
17-
ES_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-24-0'
18-
ES_EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-30-24-0'
19-
FR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-07-25-0'
20-
HU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/07-16-24-0'
21-
PT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
22-
RU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
23-
VI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
24-
SV_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
25-
ZH_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/11-13-24-0'
26-
IT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-22-24-0'
27-
HY_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-0'
28-
MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
29-
JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
30-
HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-22-25-0'
31-
DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
14+
AR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-24-24-0'
15+
DE_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-23-24-0'
16+
EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-04-24-0'
17+
ES_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-25-24-0'
18+
ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-30-24-0'
19+
FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-07-25-0'
20+
HU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-16-24-0'
21+
PT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
22+
RU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
23+
VI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
24+
SV_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
25+
ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-13-24-0'
26+
IT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-22-24-0'
27+
HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
28+
MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
29+
JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
30+
HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/02-12-25-0'
31+
DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
3232
}
3333
stages {
3434

nemo_text_processing/text_normalization/hi/taggers/date.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def __init__(self, cardinal: GraphFst):
7373

7474
delete_dash = pynutil.delete("-")
7575
delete_slash = pynutil.delete("/")
76+
delete_comma = pynutil.delete(",")
7677

7778
days_graph = pynutil.insert("day: \"") + days + pynutil.insert("\"") + insert_space
7879

nemo_text_processing/text_normalization/hi/taggers/measure.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,27 +62,15 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
6262
)
6363

6464
# Define the quarterly measurements
65-
quarter = pynini.string_map(
66-
[
67-
(".५", "साढ़े"),
68-
("१.५", "डेढ़"),
69-
("२.५", "ढाई"),
70-
]
71-
)
65+
quarter = pynini.string_map([(".५", "साढ़े"), ("१.५", "डेढ़"), ("२.५", "ढाई"),])
7266
quarter_graph = pynutil.insert("integer_part: \"") + quarter + pynutil.insert("\"")
7367

7468
# Define the unit handling
7569
unit = pynutil.insert(" units: \"") + unit_graph + pynutil.insert("\" ")
7670
units = pynutil.insert(" units: \"") + quarterly_units_graph + pynutil.insert("\" ")
7771

7872
# Handling symbols like x, X, *
79-
symbol_graph = pynini.string_map(
80-
[
81-
("x", "बाई"),
82-
("X", "बाई"),
83-
("*", "बाई"),
84-
]
85-
)
73+
symbol_graph = pynini.string_map([("x", "बाई"), ("X", "बाई"), ("*", "बाई"),])
8674

8775
graph_decimal = (
8876
pynutil.insert("decimal { ")

nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ def __init__(
6868
os.makedirs(cache_dir, exist_ok=True)
6969
whitelist_file = os.path.basename(whitelist) if whitelist else ""
7070
far_file = os.path.join(
71-
cache_dir,
72-
f"hi_tn_{deterministic}_deterministic_{input_case}_{whitelist_file}_tokenize.far",
71+
cache_dir, f"hi_tn_{deterministic}_deterministic_{input_case}_{whitelist_file}_tokenize.far",
7372
)
7473
if not overwrite_cache and far_file and os.path.exists(far_file):
7574
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]

0 commit comments

Comments
 (0)