diff --git a/Jenkinsfile b/Jenkinsfile index 3781a171d..f29de1a90 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -13,7 +13,7 @@ pipeline { AR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-24-24-0' DE_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-23-24-0' - EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-04-24-0' + EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-25-0' ES_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-24-0' ES_EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-30-24-0' FR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-07-25-0' diff --git a/nemo_text_processing/text_normalization/en/taggers/electronic.py b/nemo_text_processing/text_normalization/en/taggers/electronic.py index 874d2e437..25c3c445a 100644 --- a/nemo_text_processing/text_normalization/en/taggers/electronic.py +++ b/nemo_text_processing/text_normalization/en/taggers/electronic.py @@ -127,14 +127,15 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True): full_stop_accep = pynini.accep(".") dollar_accep = pynini.accep("$") # Include for the correct transduction of the money graph - excluded_symbols = full_stop_accep | dollar_accep + excluded_symbols = full_stop_accep | dollar_accep | pynini.accep(",") filtered_symbols = pynini.difference(accepted_symbols, excluded_symbols) accepted_characters = NEMO_ALPHA | NEMO_DIGIT | filtered_symbols domain_component = full_stop_accep + pynini.closure(accepted_characters, 2) - graph_domain = ( + graph_domain = pynutil.add_weight( pynutil.insert('domain: "') + (pynini.closure(accepted_characters, 1) + pynini.closure(domain_component, 1)) - + pynutil.insert('"') + + pynutil.insert('"'), + 0.1, ).optimize() graph |= pynutil.add_weight(graph_domain, MIN_NEG_WEIGHT) diff --git a/tests/nemo_text_processing/en/data_text_normalization/test_cases_electronic.txt b/tests/nemo_text_processing/en/data_text_normalization/test_cases_electronic.txt index 3a306158b..498528463 100644 --- a/tests/nemo_text_processing/en/data_text_normalization/test_cases_electronic.txt +++ b/tests/nemo_text_processing/en/data_text_normalization/test_cases_electronic.txt @@ -41,4 +41,5 @@ https://www.nvidia.com/dgx-basepod/~HTTPS colon slash slash WWW dot NVIDIA dot c i can use your card ending in 8876~i can use your card ending in eight eight seven six upgrade/update~upgrade slash update upgrade / update~upgrade slash update -upgrade/update/downgrade~upgrade slash update slash downgrade \ No newline at end of file +upgrade/update/downgrade~upgrade slash update slash downgrade +5.4, or 5.5~five point four, or five point five \ No newline at end of file