|
4 | 4 | from scipy.optimize import linear_sum_assignment |
5 | 5 | from scipy.spatial import distance as dis |
6 | 6 | from signwriting.formats.fsw_to_sign import fsw_to_sign |
| 7 | +from signwriting.formats.swu_to_fsw import swu2fsw |
7 | 8 | from signwriting.tokenizer import normalize_signwriting |
8 | 9 | from signwriting.types import Sign, SignSymbol |
9 | 10 |
|
@@ -98,17 +99,22 @@ def error_rate(self, hyp: Sign, ref: Sign) -> float: |
98 | 99 | return length_weight + mean_cost * (1 - length_weight) |
99 | 100 |
|
100 | 101 | def score_single_sign(self, hypothesis: str, reference: str) -> float: |
| 102 | + print("scoring", hypothesis, reference) |
101 | 103 | # Calculate the evaluate score for a given hypothesis and ref. |
102 | 104 | hyp = fsw_to_sign(hypothesis) |
103 | 105 | ref = fsw_to_sign(reference) |
104 | 106 | return pow(1 - self.error_rate(hyp, ref), 2) |
105 | 107 |
|
| 108 | + def _text_to_signs(self, text: str) -> list[str]: |
| 109 | + text_as_fsw = swu2fsw(text) # converts swu symbols to fsw, while keeping the fsw symbols if present |
| 110 | + return normalize_signwriting(text_as_fsw).split(" ") |
| 111 | + |
106 | 112 | def score(self, hypothesis: str, reference: str) -> float: |
107 | 113 | # Here, hypothesis and reference are both FSW strings of potentially different number of signs |
108 | | - hypothesis_signs = normalize_signwriting(hypothesis).split(" ") |
109 | | - reference_signs = normalize_signwriting(reference).split(" ") |
| 114 | + hypothesis_signs = self._text_to_signs(hypothesis) |
| 115 | + reference_signs = self._text_to_signs(reference) |
110 | 116 | if len(hypothesis_signs) == 1 and len(reference_signs) == 1: |
111 | | - return self.score_single_sign(hypothesis, reference) |
| 117 | + return self.score_single_sign(hypothesis_signs[0], reference_signs[0]) |
112 | 118 |
|
113 | 119 | # Pad with empty strings to make sure the number of signs is the same |
114 | 120 | if len(hypothesis_signs) != len(reference_signs): |
|
0 commit comments