Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/for_pandas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Please refer to :func:`~gokart.task.TaskOnKart.load`.
Fail on empty DataFrame
-----------------------

When the :attr:`~gokart.task.TaskOnKart.fail_on_empty_dump` parameter is true, the :func:`~gokart.task.TaskOnKart.dump()` method is `AssertionError` on trying to dump empty ``pandas.DataFrame``.
When the :attr:`~gokart.task.TaskOnKart.fail_on_empty_dump` parameter is true, the :func:`~gokart.task.TaskOnKart.dump()` method raises :class:`~gokart.task.EmptyDumpError` on trying to dump empty ``pandas.DataFrame``.


.. code:: python
Expand All @@ -70,7 +70,7 @@ When the :attr:`~gokart.task.TaskOnKart.fail_on_empty_dump` parameter is true, t
::

$ python main.py EmptyTask --fail-on-empty-dump true
# AssertionError
# EmptyDumpError
$ python main.py EmptyTask
# Task will be ran and outputs an empty dataframe

Expand Down
10 changes: 8 additions & 2 deletions gokart/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
K = TypeVar('K')


# NOTE: inherited from AssertionError for backward compatibility (Formerly, Gokart raises that exception when a task dumps an empty DataFrame).
class EmptyDumpError(AssertionError):
"""Attempted to dump an empty DataFrame even though it is prohibited (fail_on_empty_dump is set to True)."""


class TaskOnKart(luigi.Task, Generic[T]):
"""
This is a wrapper class of luigi.Task.
Expand Down Expand Up @@ -359,8 +364,9 @@ def dump(self, obj: Any, target: Union[str, TargetOnKart], custom_labels: dict[A

def dump(self, obj: Any, target: Union[None, str, TargetOnKart] = None, custom_labels: dict[str, Any] | None = None) -> None:
PandasTypeConfigMap().check(obj, task_namespace=self.task_namespace)
if self.fail_on_empty_dump and isinstance(obj, pd.DataFrame):
assert not obj.empty
if self.fail_on_empty_dump:
if isinstance(obj, pd.DataFrame) and obj.empty:
Comment thread
hirosassa marked this conversation as resolved.
raise EmptyDumpError()

self._get_output_target(target).dump(
obj,
Expand Down
3 changes: 2 additions & 1 deletion test/test_task_on_kart.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from gokart.file_processor import XmlFileProcessor
from gokart.parameter import ListTaskInstanceParameter, TaskInstanceParameter
from gokart.target import ModelTarget, SingleFileTarget, TargetOnKart
from gokart.task import EmptyDumpError


class _DummyTask(gokart.TaskOnKart):
Expand Down Expand Up @@ -414,7 +415,7 @@ def test_fail_on_empty_dump(self):

# fail
task = _DummyTask(fail_on_empty_dump=True)
self.assertRaises(AssertionError, lambda: task.dump(pd.DataFrame()))
self.assertRaises(EmptyDumpError, lambda: task.dump(pd.DataFrame()))

@patch('luigi.configuration.get_config')
def test_add_configuration(self, mock_config: Mock):
Expand Down