From 69a1f4d71c8dfe34ba03ff494cb5c4baef651f1b Mon Sep 17 00:00:00 2001 From: Mamoru Miura Date: Thu, 6 Mar 2025 07:15:49 +0900 Subject: [PATCH] feat: raise specific exception with fail_on_empty_dump --- docs/for_pandas.rst | 4 ++-- gokart/task.py | 10 ++++++++-- test/test_task_on_kart.py | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/for_pandas.rst b/docs/for_pandas.rst index ced424e3..09f01463 100644 --- a/docs/for_pandas.rst +++ b/docs/for_pandas.rst @@ -53,7 +53,7 @@ Please refer to :func:`~gokart.task.TaskOnKart.load`. Fail on empty DataFrame ----------------------- -When the :attr:`~gokart.task.TaskOnKart.fail_on_empty_dump` parameter is true, the :func:`~gokart.task.TaskOnKart.dump()` method is `AssertionError` on trying to dump empty ``pandas.DataFrame``. +When the :attr:`~gokart.task.TaskOnKart.fail_on_empty_dump` parameter is true, the :func:`~gokart.task.TaskOnKart.dump()` method raises :class:`~gokart.task.EmptyDumpError` on trying to dump empty ``pandas.DataFrame``. .. code:: python @@ -70,7 +70,7 @@ When the :attr:`~gokart.task.TaskOnKart.fail_on_empty_dump` parameter is true, t :: $ python main.py EmptyTask --fail-on-empty-dump true - # AssertionError + # EmptyDumpError $ python main.py EmptyTask # Task will be ran and outputs an empty dataframe diff --git a/gokart/task.py b/gokart/task.py index 8e38cc91..04238eae 100644 --- a/gokart/task.py +++ b/gokart/task.py @@ -38,6 +38,11 @@ K = TypeVar('K') +# NOTE: inherited from AssertionError for backward compatibility (Formerly, Gokart raises that exception when a task dumps an empty DataFrame). +class EmptyDumpError(AssertionError): + """Attempted to dump an empty DataFrame even though it is prohibited (fail_on_empty_dump is set to True).""" + + class TaskOnKart(luigi.Task, Generic[T]): """ This is a wrapper class of luigi.Task. @@ -359,8 +364,9 @@ def dump(self, obj: Any, target: Union[str, TargetOnKart], custom_labels: dict[A def dump(self, obj: Any, target: Union[None, str, TargetOnKart] = None, custom_labels: dict[str, Any] | None = None) -> None: PandasTypeConfigMap().check(obj, task_namespace=self.task_namespace) - if self.fail_on_empty_dump and isinstance(obj, pd.DataFrame): - assert not obj.empty + if self.fail_on_empty_dump: + if isinstance(obj, pd.DataFrame) and obj.empty: + raise EmptyDumpError() self._get_output_target(target).dump( obj, diff --git a/test/test_task_on_kart.py b/test/test_task_on_kart.py index 9fd709f6..8d590ead 100644 --- a/test/test_task_on_kart.py +++ b/test/test_task_on_kart.py @@ -14,6 +14,7 @@ from gokart.file_processor import XmlFileProcessor from gokart.parameter import ListTaskInstanceParameter, TaskInstanceParameter from gokart.target import ModelTarget, SingleFileTarget, TargetOnKart +from gokart.task import EmptyDumpError class _DummyTask(gokart.TaskOnKart): @@ -414,7 +415,7 @@ def test_fail_on_empty_dump(self): # fail task = _DummyTask(fail_on_empty_dump=True) - self.assertRaises(AssertionError, lambda: task.dump(pd.DataFrame())) + self.assertRaises(EmptyDumpError, lambda: task.dump(pd.DataFrame())) @patch('luigi.configuration.get_config') def test_add_configuration(self, mock_config: Mock):