Skip to content

Commit 7cdf31b

Browse files
committed
feat(similarity): support swu signs
1 parent ae203ff commit 7cdf31b

File tree

3 files changed

+18
-4
lines changed

3 files changed

+18
-4
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
.idea/
22
build/
33
signwriting_evaluation.egg-info/
4-
**/__pycache__/
4+
**/__pycache__/
5+
.env

signwriting_evaluation/metrics/similarity.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from scipy.optimize import linear_sum_assignment
55
from scipy.spatial import distance as dis
66
from signwriting.formats.fsw_to_sign import fsw_to_sign
7+
from signwriting.formats.swu_to_fsw import swu2fsw
78
from signwriting.tokenizer import normalize_signwriting
89
from signwriting.types import Sign, SignSymbol
910

@@ -98,17 +99,22 @@ def error_rate(self, hyp: Sign, ref: Sign) -> float:
9899
return length_weight + mean_cost * (1 - length_weight)
99100

100101
def score_single_sign(self, hypothesis: str, reference: str) -> float:
102+
print("scoring", hypothesis, reference)
101103
# Calculate the evaluate score for a given hypothesis and ref.
102104
hyp = fsw_to_sign(hypothesis)
103105
ref = fsw_to_sign(reference)
104106
return pow(1 - self.error_rate(hyp, ref), 2)
105107

108+
def _text_to_signs(self, text: str) -> list[str]:
109+
text_as_fsw = swu2fsw(text) # converts swu symbols to fsw, while keeping the fsw symbols if present
110+
return normalize_signwriting(text_as_fsw).split(" ")
111+
106112
def score(self, hypothesis: str, reference: str) -> float:
107113
# Here, hypothesis and reference are both FSW strings of potentially different number of signs
108-
hypothesis_signs = normalize_signwriting(hypothesis).split(" ")
109-
reference_signs = normalize_signwriting(reference).split(" ")
114+
hypothesis_signs = self._text_to_signs(hypothesis)
115+
reference_signs = self._text_to_signs(reference)
110116
if len(hypothesis_signs) == 1 and len(reference_signs) == 1:
111-
return self.score_single_sign(hypothesis, reference)
117+
return self.score_single_sign(hypothesis_signs[0], reference_signs[0])
112118

113119
# Pad with empty strings to make sure the number of signs is the same
114120
if len(hypothesis_signs) != len(reference_signs):

signwriting_evaluation/metrics/test_similarity.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ def test_bad_fsw_equals_0(self):
5858
self.assertIsInstance(score, float)
5959
self.assertAlmostEqual(score, 0)
6060

61+
def test_score_swu(self):
62+
hypothesis = "𝠃𝤤𝤬񎱃𝤎𝣠񂇒𝣿𝤀񆕁𝣺𝤐񇆤𝣧𝤗"
63+
reference = "𝠃𝤙𝤨񎵡𝤃𝣤񎲬𝤃𝣷񂈒𝣽𝤇񇆤𝣳𝤓"
64+
score = self.metric.score(hypothesis, reference)
65+
self.assertIsInstance(score, float) # Check if the score is a float
66+
self.assertAlmostEqual(score, 0.5509574768254414)
67+
6168

6269
if __name__ == '__main__':
6370
unittest.main()

0 commit comments

Comments
 (0)