4242from camel_tools .disambig .bert ._bert_morph_dataset import MorphDataset
4343from camel_tools .disambig .score_function import score_analysis_uniform
4444from camel_tools .disambig .score_function import FEATURE_SET_MAP
45+ from camel_tools .utils .dediac import dediac_ar
4546
4647
4748_SCORING_FUNCTION_MAP = {
@@ -54,6 +55,17 @@ def _read_json(f_path):
5455 return json .load (f )
5556
5657
58+ def _dediac_sentence (sentence ):
59+ dediaced_sentence = []
60+ for word in sentence :
61+ dediaced = dediac_ar (word )
62+ if len (dediaced ) > 0 :
63+ dediaced_sentence .append (dediaced )
64+ else :
65+ dediaced_sentence .append (word )
66+ return dediaced_sentence
67+
68+
5769class _BERTFeatureTagger :
5870 """A feature tagger based on the fine-tuned BERT architecture.
5971
@@ -521,7 +533,8 @@ def disambiguate(self, sentence):
521533 disambiguated analyses for the given sentence.
522534 """
523535
524- predictions = self ._predict_sentence (sentence )
536+ dediaced_sentence = _dediac_sentence (sentence )
537+ predictions = self ._predict_sentence (dediaced_sentence )
525538
526539 return [self ._disambiguate_word_fn (w , p )
527540 for (w , p ) in zip (sentence , predictions )]
@@ -538,7 +551,8 @@ def disambiguate_sentences(self, sentences):
538551 disambiguated analyses for the given sentences.
539552 """
540553
541- predictions = self ._predict_sentences (sentences )
554+ dediaced_sentences = [_dediac_sentence (s ) for s in sentences ]
555+ predictions = self ._predict_sentences (dediaced_sentences )
542556 disambiguated_sentences = []
543557
544558 for sentence , prediction in zip (sentences , predictions ):
0 commit comments