-
Notifications
You must be signed in to change notification settings - Fork 25
Description
Dear authors,
First of all, thank you for your great work and for open-sourcing your code. I recently came across the implementation of AUPRO you have provided and compared it with another commonly used implementation available at https://github.com/zqhang/AnomalyCLIP/blob/071263c1c7c1dd2a0edc5173c6e651a4621da728/metrics.py#L5. Here is the the python snippet for this purpose:
from sklearn.metrics import auc, roc_auc_score, average_precision_score, f1_score, precision_recall_curve
import numpy as np
from skimage import measure
import torch
from torchmetrics import AUROC, AveragePrecision
import time
# ref: https://github.com/gudovskiy/cflow-ad/blob/master/train.py
def cal_pro_score(masks, amaps, max_step=200, expect_fpr=0.3):
binary_amaps = np.zeros_like(amaps, dtype=bool)
min_th, max_th = amaps.min(), amaps.max()
delta = (max_th - min_th) / max_step
pros, fprs, ths = [], [], []
for th in np.arange(min_th, max_th, delta):
binary_amaps[amaps <= th], binary_amaps[amaps > th] = 0, 1
pro = []
for binary_amap, mask in zip(binary_amaps, masks):
for region in measure.regionprops(measure.label(mask)):
tp_pixels = binary_amap[region.coords[:, 0], region.coords[:, 1]].sum()
pro.append(tp_pixels / region.area)
inverse_masks = 1 - masks
fp_pixels = np.logical_and(inverse_masks, binary_amaps).sum()
fpr = fp_pixels / inverse_masks.sum()
pros.append(np.array(pro).mean())
fprs.append(fpr)
ths.append(th)
pros, fprs, ths = np.array(pros), np.array(fprs), np.array(ths)
idxes = fprs < expect_fpr
fprs = fprs[idxes]
fprs = (fprs - fprs.min()) / (fprs.max() - fprs.min())
pro_auc = auc(fprs, pros[idxes])
return pro_auc
# ref: https://github.com/M-3LAB/open-iad/blob/main/metric/mvtec3d/au_pro.py#L205
import numpy as np
from scipy.ndimage import label
from bisect import bisect
__all__ = ['GroundTruthComponent', 'trapezoid', 'collect_anomaly_scores', 'compute_pro', 'calculate_au_pro']
class GroundTruthComponent:
def __init__(self, anomaly_scores):
self.anomaly_scores = anomaly_scores.copy()
self.anomaly_scores.sort()
self.index = 0
self.last_threshold = None
def compute_overlap(self, threshold):
if self.last_threshold is not None:
assert self.last_threshold <= threshold
while self.index < len(self.anomaly_scores) and self.anomaly_scores[self.index] <= threshold:
self.index += 1
return 1.0 - self.index / len(self.anomaly_scores)
def trapezoid(x, y, x_max=None):
x = np.array(x)
y = np.array(y)
finite_mask = np.logical_and(np.isfinite(x), np.isfinite(y))
if not finite_mask.all():
print("""WARNING: Not all x and y values passed to trapezoid are finite. Will continue with only the finite values.""")
x = x[finite_mask]
y = y[finite_mask]
correction = 0.0
if x_max is not None:
if x_max not in x:
ins = bisect(x, x_max)
assert 0 < ins < len(x)
y_interp = y[ins - 1] + ((y[ins] - y[ins - 1]) * (x_max - x[ins - 1]) / (x[ins] - x[ins - 1]))
correction = 0.5 * (y_interp + y[ins - 1]) * (x_max - x[ins - 1])
mask = x <= x_max
x = x[mask]
y = y[mask]
return np.sum(0.5 * (y[1:] + y[:-1]) * (x[1:] - x[:-1])) + correction
def collect_anomaly_scores(anomaly_maps, ground_truth_maps):
assert len(anomaly_maps) == len(ground_truth_maps)
ground_truth_components = []
anomaly_scores_ok_pixels = np.zeros(len(ground_truth_maps) * ground_truth_maps[0].size)
structure = np.ones((3, 3), dtype=int)
ok_index = 0
for gt_map, prediction in zip(ground_truth_maps, anomaly_maps):
labeled, n_components = label(gt_map, structure)
num_ok_pixels = len(prediction[labeled == 0])
anomaly_scores_ok_pixels[ok_index:ok_index + num_ok_pixels] = prediction[labeled == 0].copy()
ok_index += num_ok_pixels
for k in range(n_components):
component_scores = prediction[labeled == (k + 1)]
ground_truth_components.append(GroundTruthComponent(component_scores))
anomaly_scores_ok_pixels = np.resize(anomaly_scores_ok_pixels, ok_index)
anomaly_scores_ok_pixels.sort()
return ground_truth_components, anomaly_scores_ok_pixels
def compute_pro(anomaly_maps, ground_truth_maps, num_thresholds):
ground_truth_components, anomaly_scores_ok_pixels = collect_anomaly_scores(anomaly_maps, ground_truth_maps)
threshold_positions = np.linspace(0, len(anomaly_scores_ok_pixels) - 1, num=num_thresholds, dtype=int)
fprs = [1.0]
pros = [1.0]
for pos in threshold_positions:
threshold = anomaly_scores_ok_pixels[pos]
fpr = 1.0 - (pos + 1) / len(anomaly_scores_ok_pixels)
pro = 0.0
for component in ground_truth_components:
pro += component.compute_overlap(threshold)
pro /= len(ground_truth_components)
fprs.append(fpr)
pros.append(pro)
fprs = fprs[::-1]
pros = pros[::-1]
return fprs, pros
def calculate_au_pro(gts, predictions, integration_limit=0.3, num_thresholds=200):
# Compute the PRO curve.
pro_curve = compute_pro(anomaly_maps=predictions, ground_truth_maps=gts, num_thresholds=num_thresholds)
# Compute the area under the PRO curve.
au_pro = trapezoid(pro_curve[0], pro_curve[1], x_max=integration_limit)
au_pro /= integration_limit
# Return the evaluation metrics.
return au_pro, pro_curve
def test_pro_score(masks, amaps):
start_gudovskiy = time.time()
pro_auc_gudovskiy = cal_pro_score(masks, amaps, max_step=200, expect_fpr=0.3)
end_gudovskiy = time.time()
gudovskiy_duration = end_gudovskiy - start_gudovskiy
print(f"gudovskiy execution time: {gudovskiy_duration:.4f} seconds")
start_openiad = time.time()
pro_auc_openiad = calculate_au_pro(masks, amaps, integration_limit=0.3, num_thresholds=200)[0]
end_openiad = time.time()
openiad_duration = end_openiad - start_openiad
print(f"openiad execution time: {openiad_duration:.4f} seconds")
# assert np.isclose(pro_auc_cpu, pro_auc_openiad), f"Results differ: gudovskiy={pro_auc_cpu}, openiad={pro_auc_openiad}"
print(f"Test passed: gudovskiy={pro_auc_cpu}, openiad={pro_auc_openiad}")
if __name__ == "__main__":
# Example usage (with small random data for testing)
num_sam = 25
device='0
masks = np.random.randint(0, 2, (num_sam, 256, 256)) # Binary masks
amaps = np.random.rand(num_sam, 256, 256) # Anomaly maps
test_pro_score(masks, amaps)
masks = np.random.randint(0, 2, (num_sam, 64, 64)) # Binary masks
amaps = np.random.rand(num_sam, 64, 64) # Anomaly maps
test_pro_score(masks, amaps)
The results of this comparison were quite interesting.
In particular, your provided implementation is orders of magnitude faster than the alternative. However, I noticed that the evaluation outputs show inconsistencies across different runs.
Here are some sample comparison outputs:
run 1
gudovskiy execution time: 107.9296 seconds
openiad execution time: 1.0519 seconds
Test passed: gudovskiy =0.1527266103452964, openiad=0.15258837447609364
gudovskiy execution time: 10.1126 seconds
openiad execution time: 0.0717 seconds
Test passed: gudovskiy =0.1431284037814238, openiad=0.14169722566543597
run 2
gudovskiy execution time: 90.9615 seconds
openiad execution time: 1.0264 seconds
Test passed: gudovskiy=0.15251654329367212, openiad=0.15226420621476355
gudovskiy execution time: 8.4566 seconds
openiad execution time: 0.0695 seconds
Test passed: gudovskiy=0.15303256113352123, openiad=0.15058530542983684
run 3
gudovskiy execution time: 95.9508 seconds
openiad execution time: 0.9868 seconds
Test passed: gudovskiy=0.15314727468200867, openiad=0.15087067803750034
gudovskiy execution time: 8.0690 seconds
openiad execution time: 0.1248 seconds
Test passed: gudovskiy=0.17340418943755034, openiad=0.17253984597942126
Although the differences are generally small, they seem to vary slightly across runs even under the same settings.
I wonder if this could be due to non-deterministic behavior somewhere in the evaluation pipeline (e.g., floating-point precision issues, sorting instability, or threshold discretization)?
Would you happen to have any insight into what might cause these inconsistencies? Or if you are not the author, then do you know any way contact the authors? Given how much faster this implementation is, it has the potential to become the de-facto tool for AUPRO evaluation, especially if such inconsistencies can be clarified or minimized.
Thanks again for your time and I look forward to hearing your thoughts!
Regards,
A. Salehi