Skip to content

Commit 4de1ba5

Browse files
authored
Merge pull request #164 from helske/master
Improved numerical stability with exp
2 parents 0cd4685 + eefcb5b commit 4de1ba5

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

hdbscan/_prediction_utils.pyx

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,9 @@ cpdef np.ndarray[np.float64_t, ndim=1] dist_membership_vector(
7777

7878
if softmax:
7979
for i in range(vector.shape[0]):
80-
if vector[i] != 0:
81-
result[i] = np.exp(1.0 / vector[i])
82-
else:
83-
result[i] = DBL_MAX / vector.shape[0]
84-
sum += result[i]
80+
result[i] = 1.0 / vector[i]
81+
result = np.exp(result - np.nanmax(result))
82+
sum = np.sum(result)
8583

8684
else:
8785
for i in range(vector.shape[0]):
@@ -225,8 +223,10 @@ cpdef np.ndarray[np.float64_t, ndim=1] outlier_membership_vector(neighbor,
225223
if softmax:
226224
result = per_cluster_scores(neighbor, lambda_, clusters, tree,
227225
max_lambda_dict, cluster_tree)
228-
result = np.exp(result)
229-
result[~np.isfinite(result)] = np.finfo(np.double).max
226+
# Scale for numerical stability, mathematically equivalent with old
227+
# version due to the scaling with the sum in below.
228+
result = np.exp(result - np.nanmax(result))
229+
#result[~np.isfinite(result)] = np.finfo(np.double).max
230230
else:
231231
result = per_cluster_scores(neighbor, lambda_, clusters, tree,
232232
max_lambda_dict, cluster_tree)
@@ -310,8 +310,10 @@ cpdef np.ndarray[np.float64_t, ndim=2] all_points_outlier_membership_vector(
310310
max_lambda_dict,
311311
cluster_tree)
312312
if softmax:
313-
result = np.exp(per_cluster_scores)
314-
result[~np.isfinite(result)] = np.finfo(np.double).max
313+
# Scale for numerical stability, mathematically equivalent with old
314+
# version due to the scaling with the sum in below.
315+
result = np.exp(per_cluster_scores - np.nanmax(per_cluster_scores))
316+
#result[~np.isfinite(result)] = np.finfo(np.double).max
315317
else:
316318
result = per_cluster_scores
317319

@@ -354,4 +356,3 @@ cpdef all_points_prob_in_some_cluster(
354356
result[point] = (heights.max() / max_lambda)
355357

356358
return result
357-

0 commit comments

Comments
 (0)