Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions Orange/data/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,17 @@ def _is_datetime(s):
np.column_stack(M) if M else None)


def table_to_frame(tab):
def table_to_frame(tab, include_metas=False):
"""
Convert Orange.data.Table to pandas.DataFrame

Parameters
----------
tab : Table

include_metas : bool, (default=False)
Include table metas into dataframe.

Returns
-------
pandas.DataFrame
Expand Down Expand Up @@ -122,6 +125,9 @@ def _columns_to_series(cols, vals):
if domain.metas:
metas = _columns_to_series(domain.metas, tab.metas)
all_series = dict(x + y + metas)
original_column_order = [var.name for var in tab.domain.variables]
all_vars = tab.domain.variables
if include_metas:
all_vars += tab.domain.metas
original_column_order = [var.name for var in all_vars]
unsorted_columns_df = pd.DataFrame(all_series)
return unsorted_columns_df[original_column_order]
20 changes: 20 additions & 0 deletions Orange/data/tests/test_pandas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=import-outside-toplevel
import unittest
import numpy as np
from Orange.data import ContinuousVariable, DiscreteVariable, TimeVariable, Table
Expand All @@ -7,6 +8,7 @@
except ImportError:
pd = None


@unittest.skipIf(pd is None, "Missing package 'pandas'")
class TestPandasCompat(unittest.TestCase):
def test_table_from_frame(self):
Expand Down Expand Up @@ -73,6 +75,20 @@ def test_table_to_frame(self):
self.assertEqual(list(df['sepal length'])[0:4], [5.1, 4.9, 4.7, 4.6])
self.assertEqual(list(df['iris'])[0:2], ['Iris-setosa', 'Iris-setosa'])

def test_table_to_frame_metas(self):
from Orange.data.pandas_compat import table_to_frame

table = Table("zoo")
domain = table.domain

df = table_to_frame(table)
cols = pd.Index([var.name for var in domain.variables])
pd.testing.assert_index_equal(df.columns, cols)

df = table_to_frame(table, include_metas=True)
cols = pd.Index([var.name for var in domain.variables + domain.metas])
pd.testing.assert_index_equal(df.columns, cols)

@unittest.skip("Convert all Orange demo dataset. It takes about 5s which is way to slow")
def test_table_to_frame_on_all_orange_dataset(self):
from os import listdir
Expand All @@ -96,3 +112,7 @@ def _get_orange_demo_datasets():
self.assertEqual(type(df), pd.DataFrame, assert_message)
self.assertEqual(len(df), len(table), assert_message)
self.assertEqual(len(df.columns), len(table.domain), assert_message)


if __name__ == "__main__":
unittest.main()