@@ -162,41 +162,41 @@ def statistic_(inputs):
162162 bins .append (inputs )
163163 return statistic_method (inputs )
164164
165- # If this input has only a few unique numeric values (categorical-like),
166- # build bin edges around unique values so we don't get empty states.
165+ # make bins with equal number of samples for a given dimension
166+ # sort and then split in n-state
167167 sorted_inputs = np .sort (inputs , axis = 0 )
168168 bin_edges = []
169-
169+
170170 for i , states_ in enumerate (states ):
171171 col = inputs [:, i ]
172172 uniq = np .unique (col )
173-
174- # Categorical-like numeric inputs
173+
174+ # Categorical-like numeric inputs: if we have few unique numeric values,
175+ # build edges around the unique values so we don't create empty states.
176+ # We only apply this when the requested number of states matches the
177+ # number of categories (uniq.size).
175178 if uniq .size <= 5 and states_ == uniq .size :
176179 uniq = np .sort (uniq ).astype (float )
177-
180+
178181 if uniq .size == 1 :
179- bin_edges_ = np .array (
180- [uniq [0 ] - 0.5 , uniq [0 ] + 0.5 ], dtype = float
181- )
182+ edges = np .array ([uniq [0 ] - 0.5 , uniq [0 ] + 0.5 ], dtype = float )
182183 else :
183184 gaps = np .diff (uniq )
184185 margin = 0.1 * np .min (gaps )
185-
186186 edges = np .concatenate (
187187 ([uniq [0 ] - margin ], uniq [:- 1 ] + margin , [uniq [- 1 ] + margin ])
188188 ).astype (float )
189-
189+
190190 bin_edges .append (edges )
191191 continue
192192
193- # Default: equal-number-of-samples bins
194- splits = np .array_split (sorted_inputs [:, i ], states_ )
195- bin_edges_ = [splits_ [0 ] for splits_ in splits ]
196- bin_edges_ .append (splits [- 1 ][- 1 ]) # last point to close the edges
197- bin_edges_ = np .array (bin_edges_ , dtype = float )
198- bin_edges_ += 1e-10 * np .linspace (0 , 1 , len (bin_edges_ ))
199- bin_edges .append (bin_edges_ )
193+ # Default: equal-number-of-samples bins
194+ splits = np .array_split (sorted_inputs [:, i ], states_ )
195+ edges = [s [0 ] for s in splits ]
196+ edges .append (splits [- 1 ][- 1 ]) # last point to close the edges
197+ edges = np .array (edges , dtype = float )
198+ edges += 1e-10 * np .linspace (0 , 1 , len (edges ))
199+ bin_edges .append (edges )
200200
201201
202202 res = stats .binned_statistic_dd (
0 commit comments