Skip to content

Commit a81ade8

Browse files
authored
Merge pull request #130 from ImageMarkup/cleanup
Clean up the build and test infrastructure
2 parents cee8bc7 + 58915e3 commit a81ade8

21 files changed

+225
-200
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ jobs:
99
runs-on: ubuntu-latest
1010
strategy:
1111
matrix:
12-
python-version: ['3.8', '3.9', '3.10']
12+
python-version: ['3.13']
1313
steps:
14-
- uses: actions/checkout@v3
14+
- uses: actions/checkout@v4
1515
with:
1616
lfs: true
1717
- name: Set up Python ${{ matrix.python-version }}
18-
uses: actions/setup-python@v4
18+
uses: actions/setup-python@v5
1919
with:
2020
python-version: ${{ matrix.python-version }}
2121
- name: Install tox

.github/workflows/release.yml

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,24 @@ on:
55
jobs:
66
publish:
77
runs-on: ubuntu-latest
8+
environment: release
9+
permissions:
10+
id-token: write
811
steps:
9-
- uses: actions/checkout@v3
12+
- uses: actions/checkout@v4
1013
with:
1114
# Tags are needed to compute the current version number
1215
fetch-depth: 0
1316
- name: Set up Python
14-
uses: actions/setup-python@v4
17+
uses: actions/setup-python@v5
1518
with:
16-
python-version: '3.x'
17-
- name: Install tox
19+
python-version: "3.x"
20+
- name: Install Python build
1821
run: |
1922
pip install --upgrade pip
20-
pip install tox
21-
- name: Publish to PyPI
22-
env:
23-
TWINE_USERNAME: "__token__"
24-
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
25-
TWINE_NON_INTERACTIVE: "true"
23+
pip install build
24+
- name: Build the Python distribution
2625
run: |
27-
tox -e release
26+
python -m build
27+
- name: Publish the Python distributions to PyPI
28+
uses: pypa/gh-action-pypi-publish@release/v1

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.8
1+
FROM python:3.13
22

33
WORKDIR /usr/src/isic-challenge-scoring
44

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Automated scoring code for the [ISIC Challenge](http://challenge.isic-archive.co
77

88
## Installation
99
### Python
10-
Python version >= 3.8 is required.
10+
Python version >= 3.13 is required.
1111
```bash
1212
pip install isic-challenge-scoring
1313
```

isic_challenge_scoring/__init__.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,5 @@
1-
from importlib.metadata import PackageNotFoundError, version
2-
31
from isic_challenge_scoring.classification import ClassificationMetric, ClassificationScore
42
from isic_challenge_scoring.segmentation import SegmentationScore
53
from isic_challenge_scoring.types import ScoreError
64

75
__all__ = ['ClassificationScore', 'SegmentationScore', 'ScoreError', 'ClassificationMetric']
8-
9-
try:
10-
__version__ = version('isic-challenge-scoring')
11-
except PackageNotFoundError:
12-
# package is not installed
13-
pass

isic_challenge_scoring/classification.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dataclasses import dataclass
44
import enum
55
import pathlib
6-
from typing import Dict, TextIO, cast
6+
from typing import TextIO, cast
77

88
import pandas as pd
99

@@ -23,7 +23,7 @@ class ClassificationMetric(enum.Enum):
2323
class ClassificationScore(Score):
2424
per_category: pd.DataFrame
2525
macro_average: pd.Series
26-
rocs: Dict[str, pd.DataFrame]
26+
rocs: dict[str, pd.DataFrame]
2727
aggregate: pd.Series
2828

2929
def __init__(
@@ -40,28 +40,29 @@ def __init__(
4040
self._category_score(
4141
truth_probabilities[category],
4242
prediction_probabilities[category],
43-
truth_weights.score_weight,
43+
truth_weights['score_weight'],
4444
category,
4545
)
4646
for category in categories
4747
]
4848
)
49-
self.macro_average = self.per_category.mean(axis='index').rename(
50-
'macro_average', inplace=True
51-
)
49+
# TODO: Fixed by https://github.com/pandas-dev/pandas-stubs/pull/1105
50+
self.macro_average = self.per_category.mean( # type: ignore[assignment]
51+
axis='index'
52+
).rename('macro_average', inplace=True)
5253
self.rocs = {
5354
category: metrics.roc(
5455
truth_probabilities[category],
5556
prediction_probabilities[category],
56-
truth_weights.score_weight,
57+
truth_weights['score_weight'],
5758
)
5859
for category in categories
5960
}
6061
# Multi-category aggregate metrics
6162
self.aggregate = pd.Series(
6263
{
6364
'balanced_accuracy': metrics.balanced_multiclass_accuracy(
64-
truth_probabilities, prediction_probabilities, truth_weights.score_weight
65+
truth_probabilities, prediction_probabilities, truth_weights['score_weight']
6566
)
6667
},
6768
index=['balanced_accuracy'],
@@ -71,29 +72,29 @@ def __init__(
7172
if target_metric == ClassificationMetric.BALANCED_ACCURACY:
7273
self.overall = self.aggregate.at['balanced_accuracy']
7374
self.validation = metrics.balanced_multiclass_accuracy(
74-
truth_probabilities, prediction_probabilities, truth_weights.validation_weight
75+
truth_probabilities, prediction_probabilities, truth_weights['validation_weight']
7576
)
7677
elif target_metric == ClassificationMetric.AVERAGE_PRECISION:
77-
self.overall = self.macro_average['ap']
78+
self.overall = self.macro_average.at['ap']
7879
per_category_ap = pd.Series(
7980
[
8081
metrics.average_precision(
8182
truth_probabilities[category],
8283
prediction_probabilities[category],
83-
truth_weights.validation_weight,
84+
truth_weights['validation_weight'],
8485
)
8586
for category in categories
8687
]
8788
)
8889
self.validation = per_category_ap.mean()
8990
elif target_metric == ClassificationMetric.AUC:
90-
self.overall = self.macro_average['auc']
91+
self.overall = self.macro_average.at['auc']
9192
per_category_auc = pd.Series(
9293
[
9394
metrics.auc(
9495
truth_probabilities[category],
9596
prediction_probabilities[category],
96-
truth_weights.validation_weight,
97+
truth_weights['validation_weight'],
9798
)
9899
for category in categories
99100
]
@@ -212,9 +213,10 @@ def from_file(
212213
prediction_file: pathlib.Path,
213214
target_metric: ClassificationMetric,
214215
) -> ClassificationScore:
215-
with truth_file.open('r') as truth_file_stream, prediction_file.open(
216-
'r'
217-
) as prediction_file_stream:
216+
with (
217+
truth_file.open('r') as truth_file_stream,
218+
prediction_file.open('r') as prediction_file_stream,
219+
):
218220
return cls.from_stream(
219221
truth_file_stream,
220222
prediction_file_stream,

isic_challenge_scoring/confusion.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
from typing import Optional, Tuple, Union
2-
31
import numpy as np
42
import pandas as pd
53

64

75
def create_binary_confusion_matrix(
86
truth_binary_values: np.ndarray,
97
prediction_binary_values: np.ndarray,
10-
weights: Optional[np.ndarray] = None,
11-
name: Optional[Union[str, Tuple[str, ...]]] = None,
8+
weights: np.ndarray | None = None,
9+
name: str | tuple[str, ...] | None = None,
1210
) -> pd.Series:
1311
# This implementation is:
1412
# ~30x faster than sklearn.metrics.confusion_matrix

isic_challenge_scoring/load_csv.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
from typing import TextIO, Tuple
1+
from typing import TextIO
22

33
import numpy as np
44
import pandas as pd
55

66
from isic_challenge_scoring.types import ScoreError
77

88

9-
def parse_truth_csv(csv_file_stream: TextIO) -> Tuple[pd.DataFrame, pd.DataFrame]:
9+
def parse_truth_csv(csv_file_stream: TextIO) -> tuple[pd.DataFrame, pd.DataFrame]:
1010
table = pd.read_csv(csv_file_stream, header=0)
1111

1212
table.set_index('image', drop=True, inplace=True, verify_integrity=False)
@@ -87,7 +87,7 @@ def parse_csv(csv_file_stream: TextIO, categories: pd.Index) -> pd.DataFrame:
8787
# TODO: identify specific failed rows
8888

8989
out_of_range_rows = probabilities[
90-
probabilities.applymap(lambda x: x < 0.0 or x > 1.0).any(axis='columns')
90+
probabilities.map(lambda x: x < 0.0 or x > 1.0).any(axis='columns')
9191
].index
9292
if not out_of_range_rows.empty:
9393
raise ScoreError(
@@ -120,4 +120,4 @@ def validate_rows(
120120

121121
def sort_rows(probabilities: pd.DataFrame) -> None:
122122
"""Sort rows by labels, in-place."""
123-
probabilities.sort_index(axis='rows', inplace=True)
123+
probabilities.sort_index(axis='index', inplace=True)

isic_challenge_scoring/load_image.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
from collections.abc import Generator
12
from dataclasses import dataclass, field
23
import pathlib
34
import re
4-
from typing import Generator, Match, Optional, Set
5+
from re import Match
56

67
from PIL import Image, UnidentifiedImageError
78
import numpy as np
@@ -16,15 +17,15 @@ class ImagePair:
1617
prediction_file: pathlib.Path = field(init=False)
1718
prediction_image: np.ndarray = field(init=False)
1819
image_id: str = field(init=False)
19-
attribute_id: Optional[str] = field(default=None, init=False)
20+
attribute_id: str | None = field(default=None, init=False)
2021

2122
def parse_image_id(self) -> None:
22-
image_id_match: Optional[Match[str]] = re.search(r'ISIC_[0-9]{7}', self.truth_file.stem)
23+
image_id_match: Match[str] | None = re.search(r'ISIC_[0-9]{7}', self.truth_file.stem)
2324
if not image_id_match:
2425
raise Exception(f'Unknown ground truth file: {self.truth_file.name}.')
2526
self.image_id = image_id_match.group(0)
2627

27-
attribute_id_match: Optional[Match[str]] = re.search(
28+
attribute_id_match: Match[str] | None = re.search(
2829
r'attribute_([a-z_]+)', self.truth_file.stem
2930
)
3031
if attribute_id_match:
@@ -93,7 +94,7 @@ def load_segmentation_image(image_path: pathlib.Path) -> np.ndarray:
9394

9495
def assert_binary_image(image: np.ndarray, image_path: pathlib.Path) -> np.ndarray:
9596
"""Ensure a NumPy array image is binary, correcting if possible."""
96-
image_values: Set[int] = set(np.unique(image))
97+
image_values: set[int] = set(np.unique(image))
9798
if image_values <= {0, 255}:
9899
# Expected values
99100
pass
@@ -112,7 +113,7 @@ def assert_binary_image(image: np.ndarray, image_path: pathlib.Path) -> np.ndarr
112113

113114
def iter_image_pairs(
114115
truth_path: pathlib.Path, prediction_path: pathlib.Path
115-
) -> Generator[ImagePair, None, None]:
116+
) -> Generator[ImagePair]:
116117
for truth_file in sorted(truth_path.iterdir()):
117118
if truth_file.name in {'ATTRIBUTION.txt', 'LICENSE.txt'}:
118119
continue

isic_challenge_scoring/metrics.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from typing import Tuple
21
import warnings
32

43
import numpy as np
@@ -12,7 +11,7 @@ def _to_labels(probabilities: pd.DataFrame) -> pd.Series:
1211

1312
# Find places where there are multiple maximum values
1413
max_probabilities = probabilities.max(axis='columns')
15-
is_max: pd.DataFrame = probabilities.eq(max_probabilities, axis='rows')
14+
is_max: pd.DataFrame = probabilities.eq(max_probabilities, axis='index')
1615
number_of_max: pd.Series = is_max.sum(axis='columns')
1716
multiple_max: pd.Series = number_of_max.gt(1)
1817
# Set those locations as an 'undecided' label
@@ -60,7 +59,7 @@ def _roc_curve(
6059
prediction_probabilities: pd.Series,
6160
weights: pd.Series,
6261
drop_intermediate: bool = True,
63-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
62+
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
6463
"""Call sklearn.metrics.roc_curve in a more performant way."""
6564
# This is much faster to compute if the zero-weighted probabilities are eliminated first
6665
nonzero_weights = weights.ne(0.0)
@@ -69,15 +68,18 @@ def _roc_curve(
6968
weights = weights[nonzero_weights]
7069

7170
# An additional minor optimization
72-
if weights.eq(1.0).all():
73-
weights = None
71+
sample_weight = None if weights.eq(1.0).all() else weights
7472

7573
fp_rates, tp_rates, thresholds = sklearn.metrics.roc_curve(
7674
truth_probabilities,
7775
prediction_probabilities,
78-
sample_weight=weights,
76+
sample_weight=sample_weight,
7977
drop_intermediate=drop_intermediate,
8078
)
79+
# This can contain infinity values so replace them with 1.0.
80+
# https://github.com/scikit-learn/scikit-learn/pull/26194
81+
thresholds = np.nan_to_num(thresholds, posinf=1.0)
82+
8183
return fp_rates, tp_rates, thresholds
8284

8385

@@ -131,8 +133,8 @@ def binary_threshold_jaccard(cm: pd.Series, threshold: float = 0.65) -> float:
131133

132134
def binary_dice(cm: pd.Series) -> float:
133135
if cm.at['TP'] + cm.at['FP'] + cm.at['FN'] == 0:
134-
# Dice is ill-defined if all are negative and the prediction is perfect, but we'll
135-
# just score that as a perfect answer
136+
# Dice / F1 is ill-defined if all are negative and the prediction is perfect.
137+
# See the rationale in "binary_ppv", which also applies here.
136138
return 1.0
137139
else:
138140
return (2 * cm.at['TP']) / ((2 * cm.at['TP']) + cm.at['FP'] + cm.at['FN'])
@@ -152,11 +154,8 @@ def binary_ppv(cm: pd.Series) -> float:
152154

153155
def binary_npv(cm: pd.Series) -> float:
154156
if cm.at['TN'] + cm.at['FN'] == 0:
155-
# NPV is ill-defined if all predictions are positive; we'll score it as perfect, which
156-
# doesn't penalize the case where all are truly positive (a good predictor), and is sane
157-
# for the case where some are truly negative (a limitation of this metric)
158-
# Note, some other implementations would score the latter case as 0:
159-
# https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
157+
# NPV is ill-defined if all predictions are positive.
158+
# See the rationale in "binary_ppv", which also applies here.
160159
return 1.0
161160
else:
162161
return cm.at['TN'] / (cm.at['TN'] + cm.at['FN'])
@@ -168,7 +167,7 @@ def auc(
168167
auc = sklearn.metrics.roc_auc_score(
169168
truth_probabilities, prediction_probabilities, sample_weight=weights
170169
)
171-
return auc
170+
return float(auc)
172171

173172

174173
def auc_above_sensitivity(
@@ -227,7 +226,7 @@ def auc_above_sensitivity(
227226
fp_rates_segment = np.insert(fp_rates_segment, 0, fp_rate_threshold)
228227

229228
partial_auc = sklearn.metrics.auc(fp_rates_segment, tp_rates_segment)
230-
return partial_auc
229+
return float(partial_auc)
231230

232231

233232
def average_precision(
@@ -242,7 +241,7 @@ def average_precision(
242241
ap = sklearn.metrics.average_precision_score(
243242
truth_probabilities, prediction_probabilities, sample_weight=weights
244243
)
245-
return ap
244+
return float(ap)
246245

247246

248247
def roc(

0 commit comments

Comments
 (0)