Skip to content

Commit c09db85

Browse files
authored
#36 fix. attempt 5
1 parent 9456422 commit c09db85

File tree

1 file changed

+18
-18
lines changed

1 file changed

+18
-18
lines changed

src/simdec/decomposition.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -162,41 +162,41 @@ def statistic_(inputs):
162162
bins.append(inputs)
163163
return statistic_method(inputs)
164164

165-
# If this input has only a few unique numeric values (categorical-like),
166-
# build bin edges around unique values so we don't get empty states.
165+
# make bins with equal number of samples for a given dimension
166+
# sort and then split in n-state
167167
sorted_inputs = np.sort(inputs, axis=0)
168168
bin_edges = []
169-
169+
170170
for i, states_ in enumerate(states):
171171
col = inputs[:, i]
172172
uniq = np.unique(col)
173-
174-
# Categorical-like numeric inputs
173+
174+
# Categorical-like numeric inputs: if we have few unique numeric values,
175+
# build edges around the unique values so we don't create empty states.
176+
# We only apply this when the requested number of states matches the
177+
# number of categories (uniq.size).
175178
if uniq.size <= 5 and states_ == uniq.size:
176179
uniq = np.sort(uniq).astype(float)
177-
180+
178181
if uniq.size == 1:
179-
bin_edges_ = np.array(
180-
[uniq[0] - 0.5, uniq[0] + 0.5], dtype=float
181-
)
182+
edges = np.array([uniq[0] - 0.5, uniq[0] + 0.5], dtype=float)
182183
else:
183184
gaps = np.diff(uniq)
184185
margin = 0.1 * np.min(gaps)
185-
186186
edges = np.concatenate(
187187
([uniq[0] - margin], uniq[:-1] + margin, [uniq[-1] + margin])
188188
).astype(float)
189-
189+
190190
bin_edges.append(edges)
191191
continue
192192

193-
# Default: equal-number-of-samples bins
194-
splits = np.array_split(sorted_inputs[:, i], states_)
195-
bin_edges_ = [splits_[0] for splits_ in splits]
196-
bin_edges_.append(splits[-1][-1]) # last point to close the edges
197-
bin_edges_ = np.array(bin_edges_, dtype=float)
198-
bin_edges_ += 1e-10 * np.linspace(0, 1, len(bin_edges_))
199-
bin_edges.append(bin_edges_)
193+
# Default: equal-number-of-samples bins
194+
splits = np.array_split(sorted_inputs[:, i], states_)
195+
edges = [s[0] for s in splits]
196+
edges.append(splits[-1][-1]) # last point to close the edges
197+
edges = np.array(edges, dtype=float)
198+
edges += 1e-10 * np.linspace(0, 1, len(edges))
199+
bin_edges.append(edges)
200200

201201

202202
res = stats.binned_statistic_dd(

0 commit comments

Comments
 (0)