Skip to content

Commit 510aa22

Browse files
committed
Pivot: Display time variable in time format
1 parent 0d6da92 commit 510aa22

File tree

2 files changed

+92
-17
lines changed

2 files changed

+92
-17
lines changed

Orange/widgets/data/owpivot.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class Pivot:
6868
ContVarFunctions = (Sum, Mean, Min, Max, Mode, Median, Var)
6969
DiscVarFunctions = (Majority,)
7070
TimeVarFunctions = (Mean, Min, Max, Mode, Median)
71+
FloatFunctions = (Count, Count_defined, Sum, Var)
7172

7273
class Tables:
7374
table = None # type: Table
@@ -303,11 +304,23 @@ def map_values(index, _X):
303304
_X[:, index][_X[:, index] == value] = j
304305
return values
305306

307+
create_time_var = \
308+
isinstance(val_var, TimeVariable) and \
309+
all(fun in self.TimeVarFunctions for fun in agg_funs)
310+
create_cont_var = \
311+
not val_var or val_var.is_continuous and \
312+
(not isinstance(val_var, TimeVariable) or
313+
all(fun in self.FloatFunctions for fun in agg_funs))
314+
306315
vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)]
307-
if not val_var or val_var.is_continuous:
308-
cv = ContinuousVariable
309-
attrs = [[cv(f"{v}", 1) for v in vals]] * 2
310-
attrs.extend([[cv("Total", 1)]] * 2)
316+
if create_time_var:
317+
kwargs = {"have_date": val_var.have_date,
318+
"have_time": val_var.have_time}
319+
attrs = [[TimeVariable(f"{v}", **kwargs) for v in vals]] * 2
320+
attrs.extend([[TimeVariable("Total", **kwargs)]] * 2)
321+
elif create_cont_var:
322+
attrs = [[ContinuousVariable(f"{v}", 1) for v in vals]] * 2
323+
attrs.extend([[ContinuousVariable("Total", 1)]] * 2)
311324
else:
312325
attrs = []
313326
for x in (X, X_h):
@@ -354,15 +367,19 @@ def __get_pivot_tab_x(self, val_var, agg_funs):
354367
gt = self._group_tables
355368
n_fun = len(agg_funs)
356369
n_rows, n_cols = len(self._row_var_groups), len(self._col_var_groups)
357-
kwargs = {"fill_value": np.nan, "dtype": float} \
358-
if not val_var or val_var.is_continuous \
370+
is_float_type = not val_var or val_var.is_continuous
371+
if isinstance(val_var, TimeVariable):
372+
is_float_type = \
373+
all(fun in self.TimeVarFunctions for fun in agg_funs) or \
374+
all(fun in self.FloatFunctions for fun in agg_funs)
375+
kwargs = {"fill_value": np.nan, "dtype": float} if is_float_type \
359376
else {"fill_value": "", "dtype": object}
360377
X = np.full((n_rows * n_fun, 2 + n_cols), **kwargs)
361378
X_h = np.full((n_fun, 2 + n_cols), **kwargs)
362379
X_v = np.full((n_rows * n_fun, 1), **kwargs)
363380
X_t = np.full((n_fun, 1), **kwargs)
364381
for i, fun in enumerate(agg_funs):
365-
args = (val_var, fun)
382+
args = (val_var, fun, is_float_type)
366383
X[i::n_fun, 2:] = self.__rows_for_function(n_rows, n_cols, *args)
367384
X[i::n_fun, :2] = np.array([[row_val, agg_funs.index(fun)] for
368385
row_val in self._row_var_groups])
@@ -372,13 +389,14 @@ def __get_pivot_tab_x(self, val_var, agg_funs):
372389
X_t[i] = self.__total_for_function(gt.total, *args)
373390
return X, X_h, X_v, X_t
374391

375-
def __total_for_function(self, group_tab, val_var, fun):
392+
def __total_for_function(self, group_tab, val_var, fun, is_float_type):
376393
ref = self._indepen_agg_done.get(fun, None) \
377394
or self._depen_agg_done[fun][val_var]
378395
ref -= int(bool(not self.single_var_grouping))
379-
return self.__check_continuous(val_var, group_tab.X[:, ref], fun)
396+
return self.__check_continuous(val_var, group_tab.X[:, ref],
397+
fun, is_float_type)
380398

381-
def __rows_for_function(self, n_rows, n_cols, val_var, fun):
399+
def __rows_for_function(self, n_rows, n_cols, val_var, fun, is_float_type):
382400
ref = self._indepen_agg_done.get(fun, None) \
383401
or self._depen_agg_done[fun][val_var]
384402
column = self._group_tables.table.X[:, ref]
@@ -387,14 +405,23 @@ def __rows_for_function(self, n_rows, n_cols, val_var, fun):
387405
rows[np.diag_indices_from(rows)] = column
388406
else:
389407
rows = column.reshape(n_rows, n_cols)
390-
return self.__check_continuous(val_var, rows, fun)
408+
return self.__check_continuous(val_var, rows, fun, is_float_type)
391409

392-
def __check_continuous(self, val_var, column, fun):
410+
def __check_continuous(self, val_var, column, fun, is_float_type):
393411
if val_var and not val_var.is_continuous:
394412
column = column.astype(str)
395413
if fun in self.DiscVarFunctions:
396414
for j, val in enumerate(val_var.values):
397415
column[column == str(float(j))] = val
416+
elif isinstance(val_var, TimeVariable) and not is_float_type:
417+
shape = column.shape
418+
column = column.flatten()
419+
column_ = column.astype(str)
420+
if fun in self.TimeVarFunctions:
421+
for i in range(column.shape[0]):
422+
if not np.isnan(column[i]):
423+
column_[i] = val_var.repr_val(column[i])
424+
return column_.reshape(shape)
398425
return column
399426

400427
@staticmethod
@@ -618,10 +645,8 @@ def __set_vertical_headers(self, table):
618645
self.table_model.setItem(i + 1, 1, item)
619646

620647
def _set_values(self, table):
621-
attrs = table.domain.attributes
622648
for i, j in product(range(len(table)), range(len(table[0]))):
623-
# data is read faster when reading directly from table.X
624-
value = table.X[i, j] if attrs[j].is_continuous else table[i, j]
649+
value = table[i, j]
625650
item = self._create_value_item(str(value))
626651
self.table_model.setItem(i + self._n_leading_rows,
627652
j + self._n_leading_cols, item)
@@ -982,9 +1007,10 @@ def get_filtered_data(self):
9821007
elif isinstance(at, ContinuousVariable):
9831008
f.append(FilterContinuous(at, FilterContinuous.Equal, val))
9841009
cond.append(Values(f))
985-
return Values([f for f in cond], conjunction=False)(self.data)
1010+
return Values(cond, conjunction=False)(self.data)
9861011

987-
def sizeHint(self):
1012+
@staticmethod
1013+
def sizeHint():
9881014
return QSize(640, 525)
9891015

9901016
def send_report(self):

Orange/widgets/data/tests/test_owpivot.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,55 @@ def test_pivot_disc_val_var(self):
649649
[8, 1, np.nan, np.nan]])
650650
self.assert_table_equal(pivot_tab, Table(Domain(atts), X))
651651

652+
def test_pivot_time_val_var(self):
653+
domain = Domain([Dv("d1", ("a", "b")), Dv("d2", ("c", "d")),
654+
Tv("t1", have_date=1)])
655+
X = np.array([[0, 1, 1e9], [0, 0, 1e8], [1, 0, 2e8], [1, 1, np.nan]])
656+
table = Table(domain, X)
657+
658+
# Min
659+
pivot = Pivot(table, [Pivot.Min],
660+
domain[0], domain[1], domain[2])
661+
atts = (domain[0], Dv("Aggregate", ["Min"]),
662+
Tv("c", have_date=1), Tv("d", have_date=1))
663+
X = np.array([[0, 0, 1e8, 1e9],
664+
[1, 0, 2e8, np.nan]])
665+
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))
666+
667+
# Min, Max
668+
pivot = Pivot(table, [Pivot.Min, Pivot.Max],
669+
domain[0], domain[1], domain[2])
670+
atts = (domain[0], Dv("Aggregate", ["Min", "Max"]),
671+
Tv("c", have_date=1), Tv("d", have_date=1))
672+
X = np.array([[0, 0, 1e8, 1e9],
673+
[0, 1, 1e8, 1e9],
674+
[1, 0, 2e8, np.nan],
675+
[1, 1, 2e8, np.nan]])
676+
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))
677+
678+
# Count defined, Sum
679+
pivot = Pivot(table, [Pivot.Count_defined, Pivot.Sum],
680+
domain[0], domain[1], domain[2])
681+
atts = (domain[0], Dv("Aggregate", ["Count defined", "Sum"]),
682+
Cv("c"), Cv("d"))
683+
X = np.array([[0, 0, 1, 1],
684+
[0, 1, 1e8, 1e9],
685+
[1, 0, 1, 0],
686+
[1, 1, 2e8, 0]])
687+
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))
688+
689+
# Count defined, Max
690+
pivot = Pivot(table, [Pivot.Count_defined, Pivot.Max],
691+
domain[0], domain[1], domain[2])
692+
atts = (domain[0], Dv("Aggregate", ["Count defined", "Max"]),
693+
Dv("c", ["1.0", "1973-03-03", "1976-05-03"]),
694+
Dv("d", ["0.0", "1.0", "2001-09-09"]))
695+
X = np.array([[0, 0, 0, 1],
696+
[0, 1, 1, 2],
697+
[1, 0, 0, 0],
698+
[1, 1, 2, np.nan]])
699+
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))
700+
652701
def test_pivot_attr_combinations(self):
653702
domain = self.table1.domain
654703
for var1, var2, var3 in ((domain[1], domain[3], domain[5]), # d d d

0 commit comments

Comments
 (0)