Skip to content

Commit 4b8b550

Browse files
committed
update unique functionality
1 parent 27d7f00 commit 4b8b550

File tree

1 file changed

+60
-19
lines changed

1 file changed

+60
-19
lines changed

findpeaks/stats.py

Lines changed: 60 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ def topology(X, limit=None, reverse=True, neighborhood_generator=None):
464464

465465
# It is important to ensure unique values because the method sorts the values and only unique values are processed.
466466
# Without adjusting duplicated values, peaks with exactly the same height will be skipped.
467-
X = _make_unique(X)
467+
# X = _make_unique(X)
468+
X = _make_unique_fast(X)
468469
# X = np.maximum(X + ((X > 0).astype(int) * np.random.random(X.shape) / 10), 0)
469470

470471
# Get indices orderd by value from high to low. As a tie-breaker, we use
@@ -711,21 +712,61 @@ def disable_tqdm():
711712
"""Set the logger for verbosity messages."""
712713
return (True if (logger.getEffectiveLevel()>=30) else False)
713714

714-
def _make_unique(arr: np.ndarray):
715-
"""Method iterates through elements of the input array to ensure all values are unique.
716-
Duplicate values are reduced by the smallest possible increment for the given data type.
717-
"""
718-
logger.debug('Making values unique')
719-
res = np.empty_like(arr)
720-
it = np.nditer([arr, res], [], [['readonly'], ['writeonly', 'allocate']])
721-
seen = set()
722-
with it:
723-
while not it.finished:
724-
a = it[0].item()
725-
while a in seen and np.isfinite(a):
726-
a = np.nextafter(a, -np.inf)
727-
it[1] = a
728-
if a not in seen:
729-
seen.add(a)
730-
it.iternext()
731-
return res
715+
def _make_unique_fast(X: np.ndarray) -> np.ndarray:
716+
"""Add small deterministic noise to make values unique, preserving relative order."""
717+
if not np.issubdtype(X.dtype, np.floating):
718+
X = X.astype(np.float64)
719+
eps = np.finfo(X.dtype).eps
720+
noise = np.arange(X.size, dtype=np.float64).reshape(X.shape) * eps
721+
722+
# Only apply noise where X is not exactly zero
723+
return np.where(X != 0, X + noise, X)
724+
725+
# def _make_unique_original(arr: np.ndarray):
726+
# """Method iterates through elements of the input array to ensure all values are unique.
727+
# Duplicate values are reduced by the smallest possible increment for the given data type.
728+
# """
729+
# logger.debug('Making values unique')
730+
# res = np.empty_like(arr)
731+
# it = np.nditer([arr, res], [], [['readonly'], ['writeonly', 'allocate']])
732+
# seen = set()
733+
# with it:
734+
# while not it.finished:
735+
# a = it[0].item()
736+
# while a in seen and np.isfinite(a):
737+
# a = np.nextafter(a, -np.inf)
738+
# it[1] = a
739+
# if a not in seen:
740+
# seen.add(a)
741+
# it.iternext()
742+
# return res
743+
744+
# def _make_unique(arr: np.ndarray) -> np.ndarray:
745+
# """Make all finite values in the array unique by perturbing duplicates slightly."""
746+
# # Convert to float64 if needed
747+
# if not np.issubdtype(arr.dtype, np.floating):
748+
# arr = arr.astype(np.float64)
749+
# dtype = arr.dtype
750+
751+
# arr_flat = arr.ravel()
752+
# res = arr_flat.copy()
753+
# is_finite = np.isfinite(res)
754+
# finite_vals = res[is_finite]
755+
# unique, counts = np.unique(finite_vals, return_counts=True)
756+
# dupes = unique[counts > 1]
757+
# if dupes.size == 0:
758+
# return arr.copy()
759+
760+
# seen = {}
761+
# mask = np.isin(finite_vals, dupes)
762+
# indices = np.where(mask)[0]
763+
# values = finite_vals[mask]
764+
765+
# for i, val in tqdm(zip(indices, values), total=len(values), disable=disable_tqdm(), desc=logger.info("Making values unique")):
766+
# count = seen.get(val, 0)
767+
# perturbed = np.nextafter(val, -np.inf, dtype=dtype) - count * np.finfo(dtype).eps
768+
# finite_vals[i] = perturbed
769+
# seen[val] = count + 1
770+
771+
# res[is_finite] = finite_vals
772+
# return res.reshape(arr.shape)

0 commit comments

Comments
 (0)