Skip to content

Commit e791c5b

Browse files
committed
deprecates load_data_frame
1 parent ca41084 commit e791c5b

3 files changed

Lines changed: 0 additions & 176 deletions

File tree

gokart/task.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -323,32 +323,6 @@ def _load(targets):
323323

324324
return _load(self._get_input_targets(target))
325325

326-
@deprecated("""This function is deprecated. use `load` instead.
327-
If you want to specify `required_columns` and `drop_columns`, please extract the columns after loading. ex: `load()[['colA', 'colB']]`
328-
""")
329-
def load_data_frame(
330-
self, target: Union[None, str, TargetOnKart] = None, required_columns: Optional[Set[str]] = None, drop_columns: bool = False
331-
) -> pd.DataFrame:
332-
def _flatten_recursively(dfs):
333-
if isinstance(dfs, list):
334-
return pd.concat([_flatten_recursively(df) for df in dfs])
335-
else:
336-
return dfs
337-
338-
dfs = self.load(target=target)
339-
if isinstance(dfs, dict) and len(dfs) == 1:
340-
dfs = list(dfs.values())[0]
341-
342-
data = _flatten_recursively(dfs)
343-
344-
required_columns = required_columns or set()
345-
if data.empty and len(data.index) == 0 and len(required_columns - set(data.columns)) > 0:
346-
return pd.DataFrame(columns=list(required_columns))
347-
assert required_columns.issubset(set(data.columns)), f'data must have columns {required_columns}, but actually have only {data.columns}.'
348-
if drop_columns:
349-
data = data[list(required_columns)]
350-
return data
351-
352326
@overload
353327
def dump(self, obj: T, target: None = None) -> None: ...
354328

test/test_task_on_kart.py

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -442,52 +442,6 @@ class DummyTaskAddConfiguration(gokart.TaskOnKart):
442442
mock_cmdline.return_value = luigi.cmdline_parser.CmdlineParser(['DummyTaskAddConfiguration', '--DummyTaskAddConfiguration-aa', '2'])
443443
self.assertEqual(DummyTaskAddConfiguration().aa, 2)
444444

445-
def test_load_list_of_list_pandas(self):
446-
task = _DummyTask()
447-
task.load = Mock(return_value=[pd.DataFrame(dict(a=[1])), [pd.DataFrame(dict(a=[2])), pd.DataFrame(dict(a=[3]))]]) # type: ignore
448-
449-
df = task.load_data_frame()
450-
self.assertIsInstance(df, pd.DataFrame)
451-
self.assertEqual(3, df.shape[0])
452-
453-
def test_load_single_value_dict_of_dataframe(self):
454-
task = _DummyTask()
455-
task.load = Mock(return_value={'a': pd.DataFrame(dict(a=[1]))}) # type: ignore
456-
457-
df = task.load_data_frame()
458-
self.assertIsInstance(df, pd.DataFrame)
459-
self.assertEqual(1, df.shape[0])
460-
461-
def test_load_data_frame_drop_columns(self):
462-
task = _DummyTask()
463-
task.load = Mock(return_value=pd.DataFrame(dict(a=[1], b=[2], c=[3]))) # type: ignore
464-
465-
df = task.load_data_frame(required_columns={'a', 'c'}, drop_columns=True)
466-
self.assertIsInstance(df, pd.DataFrame)
467-
self.assertEqual(1, df.shape[0])
468-
self.assertSetEqual({'a', 'c'}, set(df.columns))
469-
470-
def test_load_data_frame_empty_input(self):
471-
task = _DummyTask()
472-
task.load = Mock(return_value=pd.DataFrame(dict(a=[], b=[], c=[]))) # type: ignore
473-
474-
df = task.load_data_frame(required_columns={'a', 'c'})
475-
self.assertIsInstance(df, pd.DataFrame)
476-
self.assertEqual(0, df.shape[0])
477-
self.assertSetEqual({'a', 'b', 'c'}, set(df.columns))
478-
479-
def test_load_index_only_dataframe(self):
480-
task = _DummyTask()
481-
task.load = Mock(return_value=pd.DataFrame(index=range(3))) # type: ignore
482-
483-
# connnot load index only frame with required_columns
484-
self.assertRaises(AssertionError, lambda: task.load_data_frame(required_columns={'a', 'c'}))
485-
486-
df: pd.DataFrame = task.load_data_frame()
487-
self.assertIsInstance(df, pd.DataFrame)
488-
self.assertTrue(df.empty)
489-
self.assertListEqual(list(range(3)), list(df.index))
490-
491445
def test_use_rerun_with_inherits(self):
492446
# All tasks are completed.
493447
task_c = _DummyTaskC()

test/testing/test_run_with_empty_data_frame.py

Lines changed: 0 additions & 104 deletions
This file was deleted.

0 commit comments

Comments
 (0)