Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
.PHONY: install pre-commit test help
.PHONY: install lint test help

install:
@echo "Installing from source..."
pip install -e src/[dev]

pre-commit:
lint:
@echo "Running pre-commit..."
pre-commit install
pre-commit run --all
Expand All @@ -19,6 +19,6 @@ test:
help:
@echo "Available targets:"
@echo " install Install from source with developer tools."
@echo " pre-commit Run pre-commit."
@echo " lint Run pre-commit."
@echo " test Run tests."
@echo " help Show this help message."
12 changes: 3 additions & 9 deletions src/valor_lite/classification/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,16 +545,12 @@ def iterate_values_with_tables(self, datums: pc.Expression | None = None):
matches = tbl["match"].to_numpy()
yield ids, scores, winners, matches, tbl

def compute_rocauc(
self, datums: pc.Expression | None = None
) -> dict[MetricType, list[Metric]]:
def compute_rocauc(self) -> dict[MetricType, list[Metric]]:
"""
Compute ROCAUC.

Parameters
----------
datums : pyarrow.compute.Expression, optional
Option to filter datums by an expression.
This function does not support direct filtering. To perform evaluation over a filtered
set you must first create a new evaluator using `Evaluator.filter`.

Returns
-------
Expand All @@ -567,7 +563,6 @@ def compute_rocauc(
label_counts = extract_groundtruth_count_per_label(
reader=self._reader,
number_of_labels=len(self._index_to_label),
datums=datums,
)

prev = np.zeros((n_labels, 2), dtype=np.uint64)
Expand All @@ -577,7 +572,6 @@ def compute_rocauc(
"cumulative_fp",
"cumulative_tp",
],
filter=datums,
):
rocauc, prev = compute_rocauc(
rocauc=rocauc,
Expand Down
46 changes: 46 additions & 0 deletions tests/classification/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,3 +636,49 @@ def test_filtering_six_classifications_inline(
assert m in expected_metrics
for m in expected_metrics:
assert m in actual_metrics


def test_filtering_remove_all(
loader: Loader,
six_classifications: list[Classification],
tmp_path: Path,
):

loader.add_data(six_classifications)
evaluator = loader.finalize()

datums = pc.field("datum_uid") == "does_not_exist"

# test evaluation
base_metrics = evaluator.compute_precision_recall(datums=datums)
with pytest.raises(TypeError) as e:
evaluator.compute_rocauc(datums=datums) # type: ignore - testing
assert "unexpected keyword" in str(e)
confusion = evaluator.compute_confusion_matrix(datums=datums)
examples = evaluator.compute_examples(datums=datums)

for k, mlist in base_metrics.items():
for m in mlist:
if k == MetricType.Counts:
assert isinstance(m.value, dict)
for v in m.value.values():
assert isinstance(v, int)
assert v >= 0
else:
assert isinstance(m.value, float)
assert m.value <= 1.0
assert m.value >= 0.0
for cm in confusion:
assert isinstance(cm.value, dict)
for row in cm.value["confusion_matrix"].values():
for v in row.values():
assert isinstance(v, int)
assert v >= 0
for v in cm.value["unmatched_ground_truths"].values():
assert isinstance(v, int)
assert v >= 0
for example in examples:
assert isinstance(example, dict)
for v in example.values():
if isinstance(v, list):
assert len(v) == 0
48 changes: 48 additions & 0 deletions tests/classification/test_rocauc.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,51 @@ def test_rocauc_with_tabular_example(
assert m in expected_metrics
for m in expected_metrics:
assert m in actual_metrics


def test_rocauc_single_classification(loader: Loader):
data = [
Classification(
uid="uid",
groundtruth="dog",
predictions=["dog", "cat"],
scores=[1.0, 0.0],
)
]
loader.add_data(data)
evaluator = loader.finalize()

metrics = evaluator.compute_rocauc()

# test ROCAUC
actual_metrics = [m.to_dict() for m in metrics[MetricType.ROCAUC]]
expected_metrics = [
{
"type": "ROCAUC",
"value": 0.0,
"parameters": {
"label": "dog",
},
},
{
"type": "ROCAUC",
"value": 0.0,
"parameters": {
"label": "cat",
},
},
]
for m in actual_metrics:
assert m in expected_metrics
for m in expected_metrics:
assert m in actual_metrics

# test mROCAUC
actual_metrics = [m.to_dict() for m in metrics[MetricType.mROCAUC]]
expected_metrics = [
{"type": "mROCAUC", "value": 0.0, "parameters": {}},
]
for m in actual_metrics:
assert m in expected_metrics
for m in expected_metrics:
assert m in actual_metrics