Skip to content

Commit 028493d

Browse files
committed
Dediac input before disambiguating.
1 parent 06ab076 commit 028493d

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

camel_tools/disambig/bert/unfactored.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from camel_tools.disambig.bert._bert_morph_dataset import MorphDataset
4343
from camel_tools.disambig.score_function import score_analysis_uniform
4444
from camel_tools.disambig.score_function import FEATURE_SET_MAP
45+
from camel_tools.utils.dediac import dediac_ar
4546

4647

4748
_SCORING_FUNCTION_MAP = {
@@ -54,6 +55,17 @@ def _read_json(f_path):
5455
return json.load(f)
5556

5657

58+
def _dediac_sentence(sentence):
59+
dediaced_sentence = []
60+
for word in sentence:
61+
dediaced = dediac_ar(word)
62+
if len(dediaced) > 0:
63+
dediaced_sentence.append(dediaced)
64+
else:
65+
dediaced_sentence.append(word)
66+
return dediaced_sentence
67+
68+
5769
class _BERTFeatureTagger:
5870
"""A feature tagger based on the fine-tuned BERT architecture.
5971
@@ -521,7 +533,8 @@ def disambiguate(self, sentence):
521533
disambiguated analyses for the given sentence.
522534
"""
523535

524-
predictions = self._predict_sentence(sentence)
536+
dediaced_sentence = _dediac_sentence(sentence)
537+
predictions = self._predict_sentence(dediaced_sentence)
525538

526539
return [self._disambiguate_word_fn(w, p)
527540
for (w, p) in zip(sentence, predictions)]
@@ -538,7 +551,8 @@ def disambiguate_sentences(self, sentences):
538551
disambiguated analyses for the given sentences.
539552
"""
540553

541-
predictions = self._predict_sentences(sentences)
554+
dediaced_sentences = [_dediac_sentence(s) for s in sentences]
555+
predictions = self._predict_sentences(dediaced_sentences)
542556
disambiguated_sentences = []
543557

544558
for sentence, prediction in zip(sentences, predictions):

0 commit comments

Comments
 (0)