Skip to content

Commit 3daedd2

Browse files
committed
Copy sparse attribute when creating new variables
1 parent bc89349 commit 3daedd2

File tree

5 files changed

+27
-13
lines changed

5 files changed

+27
-13
lines changed

Orange/preprocess/continuize.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,19 @@ def transform_discrete(var):
2525
return []
2626
if treat == Continuize.AsOrdinal:
2727
new_var = ContinuousVariable(var.name,
28-
compute_value=Identity(var))
28+
compute_value=Identity(var),
29+
sparse=var.sparse)
2930
return [new_var]
3031
if treat == Continuize.AsNormalizedOrdinal:
3132
n_values = max(1, len(var.values))
3233
if self.zero_based:
33-
return [ContinuousVariable(var.name, compute_value=Normalizer(var, 0, 1 / (n_values - 1)))]
34+
return [ContinuousVariable(var.name,
35+
compute_value=Normalizer(var, 0, 1 / (n_values - 1)),
36+
sparse=var.sparse)]
3437
else:
35-
return [ContinuousVariable(var.name, compute_value=Normalizer(var, (n_values - 1) / 2, 2 / (n_values - 1)))]
38+
return [ContinuousVariable(var.name,
39+
compute_value=Normalizer(var, (n_values - 1) / 2, 2 / (n_values - 1)),
40+
sparse=var.sparse)]
3641

3742
new_vars = []
3843
if treat == Continuize.Indicators:
@@ -48,7 +53,8 @@ def transform_discrete(var):
4853
continue
4954
new_var = ContinuousVariable(
5055
"{}={}".format(var.name, val),
51-
compute_value=ind_class(var, i))
56+
compute_value=ind_class(var, i),
57+
sparse=var.sparse)
5258
new_vars.append(new_var)
5359
return new_vars
5460

Orange/preprocess/discretize.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ def create_discretized_var(cls, var, points):
7373
to_sql = SingleValueSql(values[0])
7474

7575
dvar = DiscreteVariable(name=var.name, values=values,
76-
compute_value=cls(var, points))
76+
compute_value=cls(var, points),
77+
sparse=var.sparse)
7778
dvar.source_variable = var
7879
dvar.to_sql = to_sql
7980
return dvar

Orange/preprocess/impute.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ def __call__(self, data, variable):
246246
compute_value=Lookup(
247247
variable,
248248
np.arange(len(variable.values), dtype=int),
249-
unknown=len(variable.values))
249+
unknown=len(variable.values)),
250+
sparse=variable.sparse,
250251
)
251252
return var
252253

@@ -255,7 +256,9 @@ def __call__(self, data, variable):
255256
indicator_var = Orange.data.DiscreteVariable(
256257
fmt.format(var=variable),
257258
values=("undef", "def"),
258-
compute_value=IsDefined(variable))
259+
compute_value=IsDefined(variable),
260+
sparse=variable.sparse,
261+
)
259262
stats = basic_stats.BasicStats(data, variable)
260263
return (variable.copy(compute_value=ReplaceUnknowns(variable,
261264
stats.mean)),

Orange/preprocess/normalize.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ def normalize_by_sd(self, dist, var):
4141
avg, sd = (dist.mean(), dist.standard_deviation()) if dist.size else (0, 1)
4242
if sd == 0:
4343
sd = 1
44-
return ContinuousVariable(var.name, compute_value=Norm(var, avg, 1 / sd))
44+
return ContinuousVariable(var.name, compute_value=Norm(var, avg, 1 / sd), sparse=var.sparse)
4545

4646
def normalize_by_span(self, dist, var):
4747
dma, dmi = dist.max(), dist.min()
4848
diff = dma - dmi
4949
if diff < 1e-15:
5050
diff = 1
5151
if self.zero_based:
52-
return ContinuousVariable(var.name, compute_value=Norm(var, dmi, 1 / diff))
52+
return ContinuousVariable(var.name, compute_value=Norm(var, dmi, 1 / diff), sparse=var.sparse)
5353
else:
54-
return ContinuousVariable(var.name, compute_value=Norm(var, (dma + dmi) / 2, 2 / diff))
54+
return ContinuousVariable(var.name, compute_value=Norm(var, (dma + dmi) / 2, 2 / diff), sparse=var.sparse)

Orange/preprocess/remove.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,9 @@ def merge_transforms(exp):
170170
exp.var.name,
171171
values=exp.var.values,
172172
ordered=exp.var.ordered,
173-
compute_value=merge_lookup(A, B))
173+
compute_value=merge_lookup(A, B),
174+
sparse=exp.var.sparse,
175+
)
174176
assert isinstance(prev.sub, Var)
175177
return Transformed(prev.sub, new_var)
176178
else:
@@ -253,7 +255,8 @@ def remove_unused_values(var, data):
253255
return DiscreteVariable("{}".format(var.name),
254256
values=used_values,
255257
base_value=base_value,
256-
compute_value=Lookup(var, translation_table)
258+
compute_value=Lookup(var, translation_table),
259+
sparse=var.sparse,
257260
)
258261

259262

@@ -268,7 +271,8 @@ def sort_var_values(var):
268271
)
269272

270273
return DiscreteVariable(var.name, values=newvalues,
271-
compute_value=Lookup(var, translation_table))
274+
compute_value=Lookup(var, translation_table),
275+
sparse=var.sparse)
272276

273277

274278
def merge_lookup(A, B):

0 commit comments

Comments
 (0)