Skip to content

Commit ee6806d

Browse files
committed
[FIX] Speed-up slow table_to_frame
1 parent 98af325 commit ee6806d

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

Orange/data/pandas_compat.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,7 @@ def _column_to_series(col, vals):
368368
elif col.is_continuous:
369369
dt = float
370370
# np.nan are not compatible with int column
371-
nan_values_in_column = [t for t in vals if np.isnan(t)]
372-
if col.number_of_decimals == 0 and len(nan_values_in_column) == 0:
371+
if col.number_of_decimals == 0 and not np.any(np.isnan(vals)):
373372
dt = int
374373
result = (col.name, pd.Series(vals).astype(dt))
375374
elif col.is_string:

Orange/data/tests/test_pandas.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from Orange.data import ContinuousVariable, DiscreteVariable, TimeVariable, Table, Domain, \
99
StringVariable
10-
from Orange.data.pandas_compat import OrangeDataFrame
10+
from Orange.data.pandas_compat import OrangeDataFrame, table_to_frame
1111

1212
try:
1313
import pandas as pd
@@ -70,7 +70,6 @@ def test_table_from_frame(self):
7070
self.assertEqual(types, [DiscreteVariable, ContinuousVariable, TimeVariable])
7171

7272
def test_table_to_frame(self):
73-
from Orange.data.pandas_compat import table_to_frame
7473
table = Table("iris")
7574
df = table_to_frame(table)
7675
table_column_names = [var.name for var in table.domain.variables]
@@ -81,6 +80,24 @@ def test_table_to_frame(self):
8180
self.assertEqual(list(df['sepal length'])[0:4], [5.1, 4.9, 4.7, 4.6])
8281
self.assertEqual(list(df['iris'])[0:2], ['Iris-setosa', 'Iris-setosa'])
8382

83+
def test_table_to_frame_nans(self):
84+
domain = Domain(
85+
[ContinuousVariable("a", number_of_decimals=0), ContinuousVariable("b")]
86+
)
87+
table = Table(
88+
domain, np.column_stack((np.ones(10), np.hstack((np.ones(9), [np.nan]))))
89+
)
90+
91+
df = table_to_frame(table)
92+
table_column_names = [var.name for var in table.domain.variables]
93+
frame_column_names = df.columns
94+
95+
self.assertEqual(sorted(table_column_names), sorted(frame_column_names))
96+
self.assertEqual(df["a"].dtype, int)
97+
self.assertEqual(df["b"].dtype, float)
98+
self.assertEqual([1, 1, 1], list(df["a"].iloc[-3:]))
99+
self.assertTrue(np.isnan(df["b"].iloc[-1]))
100+
84101
def test_table_to_frame_metas(self):
85102
from Orange.data.pandas_compat import table_to_frame
86103

0 commit comments

Comments
 (0)