Skip to content

Commit 8e2e726

Browse files
committed
Minor edits
1 parent 9a274d5 commit 8e2e726

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

src/midst_toolkit/evaluation/privacy/batched_eir.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,16 @@
1010

1111

1212
def _column_entropy(labels: list | np.ndarray) -> np.number:
13-
"""Compute the entropy of a single column."""
13+
"""
14+
Compute the entropy of a single column of labels.
15+
16+
Args:
17+
labels: One-dimensional collection of labels. Values are rounded
18+
before computing entropy.
19+
20+
Returns:
21+
The entropy of the distribution of rounded labels.
22+
"""
1423
_, counts = np.unique(np.round(labels), return_counts=True)
1524
return entropy(counts)
1625

@@ -45,10 +54,10 @@ def batched_reference_knn(
4554
Returns:
4655
Array of nearest neighbor distance per query row after considering all reference batches.
4756
"""
48-
n_query = len(query_df)
57+
query_df_size = len(query_df)
4958

5059
# Initizalizing a list of best distances with np.inf so they can be replaced with the actual best distances later.
51-
nearest_neighbor_distance = np.full(n_query, np.inf, dtype=float)
60+
nearest_neighbor_distance = np.full(query_df_size, np.inf, dtype=float)
5261

5362
iterator: Iterable[int]
5463
if show_progress:
@@ -93,7 +102,7 @@ def type(self) -> str:
93102
"""
94103
return "privacy"
95104

96-
def evaluate(self) -> dict:
105+
def evaluate(self) -> dict[str, float]:
97106
"""
98107
Compute epsilon-identifiability risk and privacy loss.
99108

0 commit comments

Comments
 (0)