Skip to content

Commit 19222cc

Browse files
committed
fix(wer): resolve IndexError when processing single string inputs
1 parent 5cabe3f commit 19222cc

File tree

6 files changed

+276
-79
lines changed

6 files changed

+276
-79
lines changed

werpy/summary.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,41 @@ def summary(reference, hypothesis) -> pd.DataFrame | None:
5555
except (ValueError, AttributeError, ZeroDivisionError) as err:
5656
print(f"{type(err).__name__}: {str(err)}")
5757
return None
58-
if isinstance(word_error_rate_breakdown[0], np.ndarray):
59-
word_error_rate_breakdown = word_error_rate_breakdown.tolist()
58+
59+
b = word_error_rate_breakdown
60+
61+
# Unwrap 0-D container
62+
if isinstance(b, np.ndarray) and b.ndim == 0:
63+
b = b.item()
64+
65+
if isinstance(b, np.ndarray):
66+
if b.ndim == 2:
67+
# True 2-D numeric batch
68+
word_error_rate_breakdown = b.tolist()
69+
70+
elif b.ndim == 1:
71+
# Could be either:
72+
# (a) single example row vector, or
73+
# (b) object array of per-example vectors
74+
first = b[0] if b.size else None
75+
76+
if isinstance(first, (np.ndarray, list, tuple)):
77+
# Batch stored as 1-D object array of per-example vectors (ragged fields exist)
78+
word_error_rate_breakdown = []
79+
for r in b:
80+
rr = r.tolist() if isinstance(r, np.ndarray) else r
81+
word_error_rate_breakdown.append(rr)
82+
else:
83+
# Single example vector - wrap in list for DataFrame
84+
word_error_rate_breakdown = [b.tolist()]
85+
86+
else:
87+
raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
88+
6089
else:
61-
word_error_rate_breakdown = [word_error_rate_breakdown.tolist()]
90+
# Non-numpy fallback (assume [wer, ld, m, ...])
91+
word_error_rate_breakdown = [b.tolist() if hasattr(b, 'tolist') else b]
92+
6293
columns = [
6394
"wer",
6495
"ld",

werpy/summaryp.py

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -69,29 +69,66 @@ def summaryp(
6969
except (ValueError, AttributeError, ZeroDivisionError) as err:
7070
print(f"{type(err).__name__}: {str(err)}")
7171
return None
72-
if isinstance(word_error_rate_breakdown[0], np.ndarray):
73-
word_error_rate_breakdown = word_error_rate_breakdown.tolist()
74-
transform_word_error_rate_breakdown = np.transpose(word_error_rate_breakdown)
75-
weighted_insertions = transform_word_error_rate_breakdown[3] * insertions_weight
76-
weighted_deletions = transform_word_error_rate_breakdown[4] * deletions_weight
77-
weighted_substitutions = (
78-
transform_word_error_rate_breakdown[5] * substitutions_weight
79-
)
80-
m = transform_word_error_rate_breakdown[2]
81-
weighted_errors = sum(
82-
(weighted_insertions, weighted_deletions, weighted_substitutions)
83-
)
84-
werps_result = (weighted_errors / m).tolist()
72+
73+
b = word_error_rate_breakdown
74+
75+
# Unwrap 0-D container
76+
if isinstance(b, np.ndarray) and b.ndim == 0:
77+
b = b.item()
78+
79+
if isinstance(b, np.ndarray):
80+
if b.ndim == 2:
81+
# True 2-D numeric batch
82+
word_error_rate_breakdown = b.tolist()
83+
t = b.T
84+
weighted_insertions = t[3] * insertions_weight
85+
weighted_deletions = t[4] * deletions_weight
86+
weighted_substitutions = t[5] * substitutions_weight
87+
m = t[2]
88+
weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
89+
werps_result = (weighted_errors / m).tolist()
90+
91+
elif b.ndim == 1:
92+
# Could be either:
93+
# (a) single example row vector, or
94+
# (b) object array of per-example vectors
95+
first = b[0] if b.size else None
96+
97+
if isinstance(first, (np.ndarray, list, tuple)):
98+
# Batch stored as 1-D object array of per-example vectors (ragged fields exist)
99+
word_error_rate_breakdown = []
100+
werps_result = []
101+
for r in b:
102+
rr = r.tolist() if isinstance(r, np.ndarray) else r
103+
word_error_rate_breakdown.append(rr)
104+
w_ins = float(rr[3]) * insertions_weight
105+
w_del = float(rr[4]) * deletions_weight
106+
w_sub = float(rr[5]) * substitutions_weight
107+
m_val = float(rr[2])
108+
weighted_wer = (w_ins + w_del + w_sub) / m_val if m_val else 0.0
109+
werps_result.append(weighted_wer)
110+
else:
111+
# Single example vector - wrap in list for DataFrame
112+
word_error_rate_breakdown = [b.tolist()]
113+
weighted_insertions = b[3] * insertions_weight
114+
weighted_deletions = b[4] * deletions_weight
115+
weighted_substitutions = b[5] * substitutions_weight
116+
m = b[2]
117+
weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
118+
werps_result = float(weighted_errors / m) if m else 0.0
119+
120+
else:
121+
raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
122+
85123
else:
86-
word_error_rate_breakdown = [word_error_rate_breakdown.tolist()]
87-
weighted_insertions = word_error_rate_breakdown[0][3] * insertions_weight
88-
weighted_deletions = word_error_rate_breakdown[0][4] * deletions_weight
89-
weighted_substitutions = word_error_rate_breakdown[0][5] * substitutions_weight
90-
m = word_error_rate_breakdown[0][2]
91-
weighted_errors = sum(
92-
(weighted_insertions, weighted_deletions, weighted_substitutions)
93-
)
94-
werps_result = weighted_errors / m
124+
# Non-numpy fallback (assume [wer, ld, m, ...])
125+
word_error_rate_breakdown = [b.tolist() if hasattr(b, 'tolist') else b]
126+
weighted_insertions = b[3] * insertions_weight
127+
weighted_deletions = b[4] * deletions_weight
128+
weighted_substitutions = b[5] * substitutions_weight
129+
m = b[2]
130+
weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
131+
werps_result = float(weighted_errors / m) if m else 0.0
95132

96133
columns = [
97134
"wer",

werpy/wer.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,43 @@ def wer(reference, hypothesis) -> float | np.float64 | None:
5959
except (ValueError, AttributeError, ZeroDivisionError) as err:
6060
print(f"{type(err).__name__}: {str(err)}")
6161
return None
62-
if isinstance(word_error_rate_breakdown[0], np.ndarray):
63-
transform_word_error_rate_breakdown = np.transpose(
64-
word_error_rate_breakdown.tolist()
65-
)
66-
wer_result = (np.sum(transform_word_error_rate_breakdown[1])) / (
67-
np.sum(transform_word_error_rate_breakdown[2])
68-
)
62+
63+
b = word_error_rate_breakdown
64+
65+
# Unwrap 0-D container
66+
if isinstance(b, np.ndarray) and b.ndim == 0:
67+
b = b.item()
68+
69+
if isinstance(b, np.ndarray):
70+
if b.ndim == 2:
71+
# True 2-D numeric batch
72+
t = b.T
73+
wer_result = float(np.sum(t[1]) / np.sum(t[2]))
74+
75+
elif b.ndim == 1:
76+
# Could be either:
77+
# (a) single example row vector, or
78+
# (b) object array of per-example vectors
79+
first = b[0] if b.size else None
80+
81+
if isinstance(first, (np.ndarray, list, tuple)):
82+
# Batch stored as 1-D object array of per-example vectors (ragged fields exist)
83+
total_ld = 0.0
84+
total_m = 0.0
85+
for r in b:
86+
rr = r.tolist() if isinstance(r, np.ndarray) else r
87+
total_ld += float(rr[1])
88+
total_m += float(rr[2])
89+
wer_result = float(total_ld / total_m) if total_m else 0.0
90+
else:
91+
# Single example vector
92+
wer_result = float(b[0])
93+
94+
else:
95+
raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
96+
6997
else:
70-
wer_result = word_error_rate_breakdown[0]
98+
# Non-numpy fallback (assume [wer, ld, m, ...])
99+
wer_result = float(b[0])
100+
71101
return wer_result

werpy/werp.py

Lines changed: 56 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -77,23 +77,61 @@ def werp(
7777
except (ValueError, AttributeError, ZeroDivisionError) as err:
7878
print(f"{type(err).__name__}: {str(err)}")
7979
return None
80-
if isinstance(word_error_rate_breakdown[0], np.ndarray):
81-
transform_word_error_rate_breakdown = np.transpose(
82-
word_error_rate_breakdown.tolist()
83-
)
84-
weighted_insertions = transform_word_error_rate_breakdown[3] * insertions_weight
85-
weighted_deletions = transform_word_error_rate_breakdown[4] * deletions_weight
86-
weighted_substitutions = (
87-
transform_word_error_rate_breakdown[5] * substitutions_weight
88-
)
89-
m = np.sum(transform_word_error_rate_breakdown[2])
80+
81+
b = word_error_rate_breakdown
82+
83+
# Unwrap 0-D container
84+
if isinstance(b, np.ndarray) and b.ndim == 0:
85+
b = b.item()
86+
87+
if isinstance(b, np.ndarray):
88+
if b.ndim == 2:
89+
# True 2-D numeric batch
90+
t = b.T
91+
weighted_insertions = np.sum(t[3]) * insertions_weight
92+
weighted_deletions = np.sum(t[4]) * deletions_weight
93+
weighted_substitutions = np.sum(t[5]) * substitutions_weight
94+
m = np.sum(t[2])
95+
96+
elif b.ndim == 1:
97+
# Could be either:
98+
# (a) single example row vector, or
99+
# (b) object array of per-example vectors
100+
first = b[0] if b.size else None
101+
102+
if isinstance(first, (np.ndarray, list, tuple)):
103+
# Batch stored as 1-D object array of per-example vectors (ragged fields exist)
104+
total_insertions = 0.0
105+
total_deletions = 0.0
106+
total_substitutions = 0.0
107+
total_m = 0.0
108+
for r in b:
109+
rr = r.tolist() if isinstance(r, np.ndarray) else r
110+
total_insertions += float(rr[3])
111+
total_deletions += float(rr[4])
112+
total_substitutions += float(rr[5])
113+
total_m += float(rr[2])
114+
weighted_insertions = total_insertions * insertions_weight
115+
weighted_deletions = total_deletions * deletions_weight
116+
weighted_substitutions = total_substitutions * substitutions_weight
117+
m = total_m
118+
else:
119+
# Single example vector
120+
weighted_insertions = b[3] * insertions_weight
121+
weighted_deletions = b[4] * deletions_weight
122+
weighted_substitutions = b[5] * substitutions_weight
123+
m = b[2]
124+
125+
else:
126+
raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
127+
90128
else:
91-
weighted_insertions = word_error_rate_breakdown[3] * insertions_weight
92-
weighted_deletions = word_error_rate_breakdown[4] * deletions_weight
93-
weighted_substitutions = word_error_rate_breakdown[5] * substitutions_weight
94-
m = np.sum(word_error_rate_breakdown[2])
95-
weighted_errors = np.sum(
96-
[weighted_insertions, weighted_deletions, weighted_substitutions]
97-
)
98-
werp_result = weighted_errors / m
129+
# Non-numpy fallback (assume [wer, ld, m, ...])
130+
weighted_insertions = b[3] * insertions_weight
131+
weighted_deletions = b[4] * deletions_weight
132+
weighted_substitutions = b[5] * substitutions_weight
133+
m = b[2]
134+
135+
weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
136+
werp_result = float(weighted_errors / m) if m else 0.0
99137
return werp_result

werpy/werps.py

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -71,28 +71,60 @@ def werps(
7171
except (ValueError, AttributeError, ZeroDivisionError) as err:
7272
print(f"{type(err).__name__}: {str(err)}")
7373
return None
74-
if isinstance(word_error_rate_breakdown[0], np.ndarray):
75-
transform_word_error_rate_breakdown = np.transpose(
76-
word_error_rate_breakdown.tolist()
77-
)
78-
weighted_insertions = transform_word_error_rate_breakdown[3] * insertions_weight
79-
weighted_deletions = transform_word_error_rate_breakdown[4] * deletions_weight
80-
weighted_substitutions = (
81-
transform_word_error_rate_breakdown[5] * substitutions_weight
82-
)
83-
m = transform_word_error_rate_breakdown[2]
84-
else:
85-
weighted_insertions = word_error_rate_breakdown[3] * insertions_weight
86-
weighted_deletions = word_error_rate_breakdown[4] * deletions_weight
87-
weighted_substitutions = word_error_rate_breakdown[5] * substitutions_weight
88-
m = word_error_rate_breakdown[2]
8974

90-
weighted_errors = sum(
91-
(weighted_insertions, weighted_deletions, weighted_substitutions)
92-
)
93-
werps_result = weighted_errors / m
75+
b = word_error_rate_breakdown
76+
77+
# Unwrap 0-D container
78+
if isinstance(b, np.ndarray) and b.ndim == 0:
79+
b = b.item()
80+
81+
if isinstance(b, np.ndarray):
82+
if b.ndim == 2:
83+
# True 2-D numeric batch
84+
t = b.T
85+
weighted_insertions = t[3] * insertions_weight
86+
weighted_deletions = t[4] * deletions_weight
87+
weighted_substitutions = t[5] * substitutions_weight
88+
m = t[2]
89+
weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
90+
werps_result = (weighted_errors / m).tolist()
91+
92+
elif b.ndim == 1:
93+
# Could be either:
94+
# (a) single example row vector, or
95+
# (b) object array of per-example vectors
96+
first = b[0] if b.size else None
9497

95-
if isinstance(word_error_rate_breakdown[0], float):
96-
return werps_result
98+
if isinstance(first, (np.ndarray, list, tuple)):
99+
# Batch stored as 1-D object array of per-example vectors (ragged fields exist)
100+
werps_result = []
101+
for r in b:
102+
rr = r.tolist() if isinstance(r, np.ndarray) else r
103+
w_ins = float(rr[3]) * insertions_weight
104+
w_del = float(rr[4]) * deletions_weight
105+
w_sub = float(rr[5]) * substitutions_weight
106+
m_val = float(rr[2])
107+
weighted_wer = (w_ins + w_del + w_sub) / m_val if m_val else 0.0
108+
werps_result.append(weighted_wer)
109+
else:
110+
# Single example vector
111+
weighted_insertions = b[3] * insertions_weight
112+
weighted_deletions = b[4] * deletions_weight
113+
weighted_substitutions = b[5] * substitutions_weight
114+
m = b[2]
115+
weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
116+
werps_result = float(weighted_errors / m) if m else 0.0
117+
118+
else:
119+
raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
120+
121+
else:
122+
# Non-numpy fallback (assume [wer, ld, m, ...])
123+
weighted_insertions = b[3] * insertions_weight
124+
weighted_deletions = b[4] * deletions_weight
125+
weighted_substitutions = b[5] * substitutions_weight
126+
m = b[2]
127+
weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
128+
werps_result = float(weighted_errors / m) if m else 0.0
97129

98-
return werps_result.tolist()
130+
return werps_result

werpy/wers.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,40 @@ def wers(reference, hypothesis):
5252
except (ValueError, AttributeError, ZeroDivisionError) as err:
5353
print(f"{type(err).__name__}: {str(err)}")
5454
return None
55-
if isinstance(word_error_rate_breakdown[0], np.ndarray):
56-
transform_word_error_rate_breakdown = np.transpose(
57-
word_error_rate_breakdown.tolist()
58-
)
59-
wers_result = transform_word_error_rate_breakdown[0].tolist()
55+
56+
b = word_error_rate_breakdown
57+
58+
# Unwrap 0-D container
59+
if isinstance(b, np.ndarray) and b.ndim == 0:
60+
b = b.item()
61+
62+
if isinstance(b, np.ndarray):
63+
if b.ndim == 2:
64+
# True 2-D numeric batch
65+
t = b.T
66+
wers_result = t[0].tolist()
67+
68+
elif b.ndim == 1:
69+
# Could be either:
70+
# (a) single example row vector, or
71+
# (b) object array of per-example vectors
72+
first = b[0] if b.size else None
73+
74+
if isinstance(first, (np.ndarray, list, tuple)):
75+
# Batch stored as 1-D object array of per-example vectors (ragged fields exist)
76+
wers_result = []
77+
for r in b:
78+
rr = r.tolist() if isinstance(r, np.ndarray) else r
79+
wers_result.append(float(rr[0]))
80+
else:
81+
# Single example vector
82+
wers_result = float(b[0])
83+
84+
else:
85+
raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
86+
6087
else:
61-
wers_result = word_error_rate_breakdown[0]
88+
# Non-numpy fallback (assume [wer, ld, m, ...])
89+
wers_result = float(b[0])
90+
6291
return wers_result

0 commit comments

Comments
 (0)