fix(wer): resolve IndexError when processing single string inputs

rossarmstrong · rossarmstrong · commit 19222cc87c6a · 2025-12-14T22:07:22.000+11:00
diff --git a/werpy/summary.py b/werpy/summary.py
@@ -55,10 +55,41 @@ def summary(reference, hypothesis) -> pd.DataFrame | None:
     except (ValueError, AttributeError, ZeroDivisionError) as err:
         print(f"{type(err).__name__}: {str(err)}")
         return None
-    if isinstance(word_error_rate_breakdown[0], np.ndarray):
-        word_error_rate_breakdown = word_error_rate_breakdown.tolist()
+
+    b = word_error_rate_breakdown
+
+    # Unwrap 0-D container
+    if isinstance(b, np.ndarray) and b.ndim == 0:
+        b = b.item()
+
+    if isinstance(b, np.ndarray):
+        if b.ndim == 2:
+            # True 2-D numeric batch
+            word_error_rate_breakdown = b.tolist()
+
+        elif b.ndim == 1:
+            # Could be either:
+            # (a) single example row vector, or
+            # (b) object array of per-example vectors
+            first = b[0] if b.size else None
+
+            if isinstance(first, (np.ndarray, list, tuple)):
+                # Batch stored as 1-D object array of per-example vectors (ragged fields exist)
+                word_error_rate_breakdown = []
+                for r in b:
+                    rr = r.tolist() if isinstance(r, np.ndarray) else r
+                    word_error_rate_breakdown.append(rr)
+            else:
+                # Single example vector - wrap in list for DataFrame
+                word_error_rate_breakdown = [b.tolist()]
+
+        else:
+            raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
+
     else:
-        word_error_rate_breakdown = [word_error_rate_breakdown.tolist()]
+        # Non-numpy fallback (assume [wer, ld, m, ...])
+        word_error_rate_breakdown = [b.tolist() if hasattr(b, 'tolist') else b]
+
     columns = [
         "wer",
         "ld",
diff --git a/werpy/summaryp.py b/werpy/summaryp.py
@@ -69,29 +69,66 @@ def summaryp(
     except (ValueError, AttributeError, ZeroDivisionError) as err:
         print(f"{type(err).__name__}: {str(err)}")
         return None
-    if isinstance(word_error_rate_breakdown[0], np.ndarray):
-        word_error_rate_breakdown = word_error_rate_breakdown.tolist()
-        transform_word_error_rate_breakdown = np.transpose(word_error_rate_breakdown)
-        weighted_insertions = transform_word_error_rate_breakdown[3] * insertions_weight
-        weighted_deletions = transform_word_error_rate_breakdown[4] * deletions_weight
-        weighted_substitutions = (
-            transform_word_error_rate_breakdown[5] * substitutions_weight
-        )
-        m = transform_word_error_rate_breakdown[2]
-        weighted_errors = sum(
-            (weighted_insertions, weighted_deletions, weighted_substitutions)
-        )
-        werps_result = (weighted_errors / m).tolist()
+
+    b = word_error_rate_breakdown
+
+    # Unwrap 0-D container
+    if isinstance(b, np.ndarray) and b.ndim == 0:
+        b = b.item()
+
+    if isinstance(b, np.ndarray):
+        if b.ndim == 2:
+            # True 2-D numeric batch
+            word_error_rate_breakdown = b.tolist()
+            t = b.T
+            weighted_insertions = t[3] * insertions_weight
+            weighted_deletions = t[4] * deletions_weight
+            weighted_substitutions = t[5] * substitutions_weight
+            m = t[2]
+            weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
+            werps_result = (weighted_errors / m).tolist()
+
+        elif b.ndim == 1:
+            # Could be either:
+            # (a) single example row vector, or
+            # (b) object array of per-example vectors
+            first = b[0] if b.size else None
+
+            if isinstance(first, (np.ndarray, list, tuple)):
+                # Batch stored as 1-D object array of per-example vectors (ragged fields exist)
+                word_error_rate_breakdown = []
+                werps_result = []
+                for r in b:
+                    rr = r.tolist() if isinstance(r, np.ndarray) else r
+                    word_error_rate_breakdown.append(rr)
+                    w_ins = float(rr[3]) * insertions_weight
+                    w_del = float(rr[4]) * deletions_weight
+                    w_sub = float(rr[5]) * substitutions_weight
+                    m_val = float(rr[2])
+                    weighted_wer = (w_ins + w_del + w_sub) / m_val if m_val else 0.0
+                    werps_result.append(weighted_wer)
+            else:
+                # Single example vector - wrap in list for DataFrame
+                word_error_rate_breakdown = [b.tolist()]
+                weighted_insertions = b[3] * insertions_weight
+                weighted_deletions = b[4] * deletions_weight
+                weighted_substitutions = b[5] * substitutions_weight
+                m = b[2]
+                weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
+                werps_result = float(weighted_errors / m) if m else 0.0
+
+        else:
+            raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
+
     else:
-        word_error_rate_breakdown = [word_error_rate_breakdown.tolist()]
-        weighted_insertions = word_error_rate_breakdown[0][3] * insertions_weight
-        weighted_deletions = word_error_rate_breakdown[0][4] * deletions_weight
-        weighted_substitutions = word_error_rate_breakdown[0][5] * substitutions_weight
-        m = word_error_rate_breakdown[0][2]
-        weighted_errors = sum(
-            (weighted_insertions, weighted_deletions, weighted_substitutions)
-        )
-        werps_result = weighted_errors / m
+        # Non-numpy fallback (assume [wer, ld, m, ...])
+        word_error_rate_breakdown = [b.tolist() if hasattr(b, 'tolist') else b]
+        weighted_insertions = b[3] * insertions_weight
+        weighted_deletions = b[4] * deletions_weight
+        weighted_substitutions = b[5] * substitutions_weight
+        m = b[2]
+        weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
+        werps_result = float(weighted_errors / m) if m else 0.0
 
     columns = [
         "wer",
diff --git a/werpy/wer.py b/werpy/wer.py
@@ -59,13 +59,43 @@ def wer(reference, hypothesis) -> float | np.float64 | None:
     except (ValueError, AttributeError, ZeroDivisionError) as err:
         print(f"{type(err).__name__}: {str(err)}")
         return None
-    if isinstance(word_error_rate_breakdown[0], np.ndarray):
-        transform_word_error_rate_breakdown = np.transpose(
-            word_error_rate_breakdown.tolist()
-        )
-        wer_result = (np.sum(transform_word_error_rate_breakdown[1])) / (
-            np.sum(transform_word_error_rate_breakdown[2])
-        )
+
+    b = word_error_rate_breakdown
+
+    # Unwrap 0-D container
+    if isinstance(b, np.ndarray) and b.ndim == 0:
+        b = b.item()
+
+    if isinstance(b, np.ndarray):
+        if b.ndim == 2:
+            # True 2-D numeric batch
+            t = b.T
+            wer_result = float(np.sum(t[1]) / np.sum(t[2]))
+
+        elif b.ndim == 1:
+            # Could be either:
+            # (a) single example row vector, or
+            # (b) object array of per-example vectors
+            first = b[0] if b.size else None
+
+            if isinstance(first, (np.ndarray, list, tuple)):
+                # Batch stored as 1-D object array of per-example vectors (ragged fields exist)
+                total_ld = 0.0
+                total_m = 0.0
+                for r in b:
+                    rr = r.tolist() if isinstance(r, np.ndarray) else r
+                    total_ld += float(rr[1])
+                    total_m += float(rr[2])
+                wer_result = float(total_ld / total_m) if total_m else 0.0
+            else:
+                # Single example vector
+                wer_result = float(b[0])
+
+        else:
+            raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
+
     else:
-        wer_result = word_error_rate_breakdown[0]
+        # Non-numpy fallback (assume [wer, ld, m, ...])
+        wer_result = float(b[0])
+
     return wer_result
diff --git a/werpy/werp.py b/werpy/werp.py
@@ -77,23 +77,61 @@ def werp(
     except (ValueError, AttributeError, ZeroDivisionError) as err:
         print(f"{type(err).__name__}: {str(err)}")
         return None
-    if isinstance(word_error_rate_breakdown[0], np.ndarray):
-        transform_word_error_rate_breakdown = np.transpose(
-            word_error_rate_breakdown.tolist()
-        )
-        weighted_insertions = transform_word_error_rate_breakdown[3] * insertions_weight
-        weighted_deletions = transform_word_error_rate_breakdown[4] * deletions_weight
-        weighted_substitutions = (
-            transform_word_error_rate_breakdown[5] * substitutions_weight
-        )
-        m = np.sum(transform_word_error_rate_breakdown[2])
+
+    b = word_error_rate_breakdown
+
+    # Unwrap 0-D container
+    if isinstance(b, np.ndarray) and b.ndim == 0:
+        b = b.item()
+
+    if isinstance(b, np.ndarray):
+        if b.ndim == 2:
+            # True 2-D numeric batch
+            t = b.T
+            weighted_insertions = np.sum(t[3]) * insertions_weight
+            weighted_deletions = np.sum(t[4]) * deletions_weight
+            weighted_substitutions = np.sum(t[5]) * substitutions_weight
+            m = np.sum(t[2])
+
+        elif b.ndim == 1:
+            # Could be either:
+            # (a) single example row vector, or
+            # (b) object array of per-example vectors
+            first = b[0] if b.size else None
+
+            if isinstance(first, (np.ndarray, list, tuple)):
+                # Batch stored as 1-D object array of per-example vectors (ragged fields exist)
+                total_insertions = 0.0
+                total_deletions = 0.0
+                total_substitutions = 0.0
+                total_m = 0.0
+                for r in b:
+                    rr = r.tolist() if isinstance(r, np.ndarray) else r
+                    total_insertions += float(rr[3])
+                    total_deletions += float(rr[4])
+                    total_substitutions += float(rr[5])
+                    total_m += float(rr[2])
+                weighted_insertions = total_insertions * insertions_weight
+                weighted_deletions = total_deletions * deletions_weight
+                weighted_substitutions = total_substitutions * substitutions_weight
+                m = total_m
+            else:
+                # Single example vector
+                weighted_insertions = b[3] * insertions_weight
+                weighted_deletions = b[4] * deletions_weight
+                weighted_substitutions = b[5] * substitutions_weight
+                m = b[2]
+
+        else:
+            raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
+
     else:
-        weighted_insertions = word_error_rate_breakdown[3] * insertions_weight
-        weighted_deletions = word_error_rate_breakdown[4] * deletions_weight
-        weighted_substitutions = word_error_rate_breakdown[5] * substitutions_weight
-        m = np.sum(word_error_rate_breakdown[2])
-    weighted_errors = np.sum(
-        [weighted_insertions, weighted_deletions, weighted_substitutions]
-    )
-    werp_result = weighted_errors / m
+        # Non-numpy fallback (assume [wer, ld, m, ...])
+        weighted_insertions = b[3] * insertions_weight
+        weighted_deletions = b[4] * deletions_weight
+        weighted_substitutions = b[5] * substitutions_weight
+        m = b[2]
+
+    weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
+    werp_result = float(weighted_errors / m) if m else 0.0
     return werp_result
diff --git a/werpy/werps.py b/werpy/werps.py
@@ -71,28 +71,60 @@ def werps(
     except (ValueError, AttributeError, ZeroDivisionError) as err:
         print(f"{type(err).__name__}: {str(err)}")
         return None
-    if isinstance(word_error_rate_breakdown[0], np.ndarray):
-        transform_word_error_rate_breakdown = np.transpose(
-            word_error_rate_breakdown.tolist()
-        )
-        weighted_insertions = transform_word_error_rate_breakdown[3] * insertions_weight
-        weighted_deletions = transform_word_error_rate_breakdown[4] * deletions_weight
-        weighted_substitutions = (
-            transform_word_error_rate_breakdown[5] * substitutions_weight
-        )
-        m = transform_word_error_rate_breakdown[2]
-    else:
-        weighted_insertions = word_error_rate_breakdown[3] * insertions_weight
-        weighted_deletions = word_error_rate_breakdown[4] * deletions_weight
-        weighted_substitutions = word_error_rate_breakdown[5] * substitutions_weight
-        m = word_error_rate_breakdown[2]
 
-    weighted_errors = sum(
-        (weighted_insertions, weighted_deletions, weighted_substitutions)
-    )
-    werps_result = weighted_errors / m
+    b = word_error_rate_breakdown
+
+    # Unwrap 0-D container
+    if isinstance(b, np.ndarray) and b.ndim == 0:
+        b = b.item()
+
+    if isinstance(b, np.ndarray):
+        if b.ndim == 2:
+            # True 2-D numeric batch
+            t = b.T
+            weighted_insertions = t[3] * insertions_weight
+            weighted_deletions = t[4] * deletions_weight
+            weighted_substitutions = t[5] * substitutions_weight
+            m = t[2]
+            weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
+            werps_result = (weighted_errors / m).tolist()
+
+        elif b.ndim == 1:
+            # Could be either:
+            # (a) single example row vector, or
+            # (b) object array of per-example vectors
+            first = b[0] if b.size else None
 
-    if isinstance(word_error_rate_breakdown[0], float):
-        return werps_result
+            if isinstance(first, (np.ndarray, list, tuple)):
+                # Batch stored as 1-D object array of per-example vectors (ragged fields exist)
+                werps_result = []
+                for r in b:
+                    rr = r.tolist() if isinstance(r, np.ndarray) else r
+                    w_ins = float(rr[3]) * insertions_weight
+                    w_del = float(rr[4]) * deletions_weight
+                    w_sub = float(rr[5]) * substitutions_weight
+                    m_val = float(rr[2])
+                    weighted_wer = (w_ins + w_del + w_sub) / m_val if m_val else 0.0
+                    werps_result.append(weighted_wer)
+            else:
+                # Single example vector
+                weighted_insertions = b[3] * insertions_weight
+                weighted_deletions = b[4] * deletions_weight
+                weighted_substitutions = b[5] * substitutions_weight
+                m = b[2]
+                weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
+                werps_result = float(weighted_errors / m) if m else 0.0
+
+        else:
+            raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
+
+    else:
+        # Non-numpy fallback (assume [wer, ld, m, ...])
+        weighted_insertions = b[3] * insertions_weight
+        weighted_deletions = b[4] * deletions_weight
+        weighted_substitutions = b[5] * substitutions_weight
+        m = b[2]
+        weighted_errors = weighted_insertions + weighted_deletions + weighted_substitutions
+        werps_result = float(weighted_errors / m) if m else 0.0
 
-    return werps_result.tolist()
+    return werps_result
diff --git a/werpy/wers.py b/werpy/wers.py
@@ -52,11 +52,40 @@ def wers(reference, hypothesis):
     except (ValueError, AttributeError, ZeroDivisionError) as err:
         print(f"{type(err).__name__}: {str(err)}")
         return None
-    if isinstance(word_error_rate_breakdown[0], np.ndarray):
-        transform_word_error_rate_breakdown = np.transpose(
-            word_error_rate_breakdown.tolist()
-        )
-        wers_result = transform_word_error_rate_breakdown[0].tolist()
+
+    b = word_error_rate_breakdown
+
+    # Unwrap 0-D container
+    if isinstance(b, np.ndarray) and b.ndim == 0:
+        b = b.item()
+
+    if isinstance(b, np.ndarray):
+        if b.ndim == 2:
+            # True 2-D numeric batch
+            t = b.T
+            wers_result = t[0].tolist()
+
+        elif b.ndim == 1:
+            # Could be either:
+            # (a) single example row vector, or
+            # (b) object array of per-example vectors
+            first = b[0] if b.size else None
+
+            if isinstance(first, (np.ndarray, list, tuple)):
+                # Batch stored as 1-D object array of per-example vectors (ragged fields exist)
+                wers_result = []
+                for r in b:
+                    rr = r.tolist() if isinstance(r, np.ndarray) else r
+                    wers_result.append(float(rr[0]))
+            else:
+                # Single example vector
+                wers_result = float(b[0])
+
+        else:
+            raise ValueError(f"Unexpected metrics output ndim: {b.ndim}")
+
     else:
-        wers_result = word_error_rate_breakdown[0]
+        # Non-numpy fallback (assume [wer, ld, m, ...])
+        wers_result = float(b[0])
+
     return wers_result