@@ -464,7 +464,8 @@ def topology(X, limit=None, reverse=True, neighborhood_generator=None):
464464
465465 # It is important to ensure unique values because the method sorts the values and only unique values are processed.
466466 # Without adjusting duplicated values, peaks with exactly the same height will be skipped.
467- X = _make_unique (X )
467+ # X = _make_unique(X)
468+ X = _make_unique_fast (X )
468469 # X = np.maximum(X + ((X > 0).astype(int) * np.random.random(X.shape) / 10), 0)
469470
470471 # Get indices orderd by value from high to low. As a tie-breaker, we use
@@ -711,21 +712,61 @@ def disable_tqdm():
711712 """Set the logger for verbosity messages."""
712713 return (True if (logger .getEffectiveLevel ()>= 30 ) else False )
713714
714- def _make_unique (arr : np .ndarray ):
715- """Method iterates through elements of the input array to ensure all values are unique.
716- Duplicate values are reduced by the smallest possible increment for the given data type.
717- """
718- logger .debug ('Making values unique' )
719- res = np .empty_like (arr )
720- it = np .nditer ([arr , res ], [], [['readonly' ], ['writeonly' , 'allocate' ]])
721- seen = set ()
722- with it :
723- while not it .finished :
724- a = it [0 ].item ()
725- while a in seen and np .isfinite (a ):
726- a = np .nextafter (a , - np .inf )
727- it [1 ] = a
728- if a not in seen :
729- seen .add (a )
730- it .iternext ()
731- return res
715+ def _make_unique_fast (X : np .ndarray ) -> np .ndarray :
716+ """Add small deterministic noise to make values unique, preserving relative order."""
717+ if not np .issubdtype (X .dtype , np .floating ):
718+ X = X .astype (np .float64 )
719+ eps = np .finfo (X .dtype ).eps
720+ noise = np .arange (X .size , dtype = np .float64 ).reshape (X .shape ) * eps
721+
722+ # Only apply noise where X is not exactly zero
723+ return np .where (X != 0 , X + noise , X )
724+
725+ # def _make_unique_original(arr: np.ndarray):
726+ # """Method iterates through elements of the input array to ensure all values are unique.
727+ # Duplicate values are reduced by the smallest possible increment for the given data type.
728+ # """
729+ # logger.debug('Making values unique')
730+ # res = np.empty_like(arr)
731+ # it = np.nditer([arr, res], [], [['readonly'], ['writeonly', 'allocate']])
732+ # seen = set()
733+ # with it:
734+ # while not it.finished:
735+ # a = it[0].item()
736+ # while a in seen and np.isfinite(a):
737+ # a = np.nextafter(a, -np.inf)
738+ # it[1] = a
739+ # if a not in seen:
740+ # seen.add(a)
741+ # it.iternext()
742+ # return res
743+
744+ # def _make_unique(arr: np.ndarray) -> np.ndarray:
745+ # """Make all finite values in the array unique by perturbing duplicates slightly."""
746+ # # Convert to float64 if needed
747+ # if not np.issubdtype(arr.dtype, np.floating):
748+ # arr = arr.astype(np.float64)
749+ # dtype = arr.dtype
750+
751+ # arr_flat = arr.ravel()
752+ # res = arr_flat.copy()
753+ # is_finite = np.isfinite(res)
754+ # finite_vals = res[is_finite]
755+ # unique, counts = np.unique(finite_vals, return_counts=True)
756+ # dupes = unique[counts > 1]
757+ # if dupes.size == 0:
758+ # return arr.copy()
759+
760+ # seen = {}
761+ # mask = np.isin(finite_vals, dupes)
762+ # indices = np.where(mask)[0]
763+ # values = finite_vals[mask]
764+
765+ # for i, val in tqdm(zip(indices, values), total=len(values), disable=disable_tqdm(), desc=logger.info("Making values unique")):
766+ # count = seen.get(val, 0)
767+ # perturbed = np.nextafter(val, -np.inf, dtype=dtype) - count * np.finfo(dtype).eps
768+ # finite_vals[i] = perturbed
769+ # seen[val] = count + 1
770+
771+ # res[is_finite] = finite_vals
772+ # return res.reshape(arr.shape)
0 commit comments