@@ -851,6 +851,35 @@ def _estimator_action(self, estimator, X_train, y_train, X_test,
851851 # -------------------------------------------------------------------------
852852 # -------------------------------------------------------------------------
853853
854+ def _random_choice (self , n , size , bound = 2 ** 30 ):
855+ """
856+ Memory efficient (but slower) version of np.random.choice
857+
858+ Parameters:
859+ ===========
860+ n : int
861+ Upper value for range to chose from: [0, n).
862+ This parameter is bounded (see bound).
863+ size: int
864+ Number of values to chose
865+ bound : int
866+ Upper random int for backward compatibility
867+ with some older numpy versions
868+
869+ Returns:
870+ ========
871+ ids : 1d numpy array of shape (size, ) dtype=np.int32
872+ """
873+ ids = []
874+ while len (ids ) < size :
875+ rnd = np .random .randint (min (bound , n ))
876+ if rnd not in ids :
877+ ids .append (rnd )
878+ return np .array (ids , dtype = np .int32 )
879+
880+ # -------------------------------------------------------------------------
881+ # -------------------------------------------------------------------------
882+
854883 def _get_footprint (self , X , n_items = 1000 ):
855884 """Selects ``n_items`` random elements from 2d numpy array or
856885 sparse matrix (or all elements if their number is less or equal
@@ -861,7 +890,11 @@ def _get_footprint(self, X, n_items=1000):
861890 r , c = X .shape
862891 n = r * c
863892 # np.random.seed(0) # for development
864- ids = np .random .choice (n , min (n_items , n ), replace = False )
893+
894+ # OOM with large arrays (see #29)
895+ # ids = np.random.choice(n, min(n_items, n), replace=False)
896+
897+ ids = self ._random_choice (n , min (n_items , n ))
865898
866899 for i in ids :
867900 row = i // c
0 commit comments