Skip to content

Commit 59f4ccd

Browse files
committed
checks, unit tests
1 parent 4212986 commit 59f4ccd

File tree

3 files changed

+115
-0
lines changed

3 files changed

+115
-0
lines changed

src/data_designer/engine/column_generators/generators/custom.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,23 @@ def generate(self, data: pd.DataFrame) -> pd.DataFrame:
3131
logger.info(f"🛠️ Generating custom column {self.config.name!r} with {len(data)} records")
3232
logger.info(f" |-- generator function: {self.config.generator_function.__name__}")
3333

34+
original_columns = set(data.columns)
3435
try:
3536
result = self.config.generator_function(data)
37+
38+
# Check if custom column is the only one that was added
39+
diff_columns = set(result.columns) - original_columns
40+
if len(diff_columns) == 0:
41+
raise DataDesignerRuntimeError(
42+
f"Custom column generator {self.config.generator_function.__name__} added no columns. "
43+
f"Expected column {self.config.name!r} to be added by this generator."
44+
)
45+
elif diff_columns != {self.config.name}:
46+
diff_columns_str = ", ".join(diff_columns - {self.config.name})
47+
raise DataDesignerRuntimeError(
48+
f"Custom column generator {self.config.generator_function.__name__} added unexpected columns: {diff_columns_str}. "
49+
f"Expected only column {self.config.name!r} to be added by this generator."
50+
)
3651
except Exception as e:
3752
raise DataDesignerRuntimeError(f"Error generating custom column {self.config.name!r}: {e}")
3853

tests/config/test_columns.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def test_data_designer_column_type_get_display_order():
3838
DataDesignerColumnType.LLM_JUDGE,
3939
DataDesignerColumnType.VALIDATION,
4040
DataDesignerColumnType.EXPRESSION,
41+
DataDesignerColumnType.CUSTOM,
4142
]
4243

4344

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import pandas as pd
5+
import pytest
6+
7+
from data_designer.config.columns import CustomColumnConfig
8+
from data_designer.engine.column_generators.generators.custom import CustomColumnGenerator
9+
from data_designer.engine.errors import DataDesignerRuntimeError
10+
11+
12+
def test_generate_successful_custom_column(stub_resource_provider: object) -> None:
13+
"""Test successful generation of a custom column."""
14+
15+
def add_sum_column(data: pd.DataFrame) -> pd.DataFrame:
16+
data["sum_column"] = data["col1"] + data["other_col"]
17+
return data
18+
19+
config = CustomColumnConfig(name="sum_column", generator_function=add_sum_column)
20+
generator = CustomColumnGenerator(config=config, resource_provider=stub_resource_provider)
21+
22+
df = pd.DataFrame({"col1": [1, 2, 3, 4], "other_col": [10, 20, 30, 40]})
23+
result = generator.generate(df)
24+
25+
assert "sum_column" in result.columns
26+
assert result["sum_column"].tolist() == [11, 22, 33, 44]
27+
assert len(result) == 4
28+
29+
30+
def test_generate_custom_column_with_string_data(stub_resource_provider: object) -> None:
31+
"""Test custom column generation with string manipulation."""
32+
33+
def add_full_name_column(data: pd.DataFrame) -> pd.DataFrame:
34+
data["full_name"] = data["first_name"] + " " + data["last_name"]
35+
return data
36+
37+
config = CustomColumnConfig(name="full_name", generator_function=add_full_name_column)
38+
generator = CustomColumnGenerator(config=config, resource_provider=stub_resource_provider)
39+
40+
df = pd.DataFrame({"first_name": ["John", "Jane", "Bob"], "last_name": ["Doe", "Smith", "Johnson"]})
41+
result = generator.generate(df)
42+
43+
assert "full_name" in result.columns
44+
assert result["full_name"].tolist() == ["John Doe", "Jane Smith", "Bob Johnson"]
45+
46+
47+
def test_generate_error_when_unexpected_columns_added(stub_resource_provider: object) -> None:
48+
"""Test that an error is raised when the generator adds unexpected columns."""
49+
50+
def add_multiple_columns(data: pd.DataFrame) -> pd.DataFrame:
51+
data["expected_column"] = data["col1"] * 2
52+
data["unexpected_column"] = data["col1"] * 3 # This should cause an error
53+
return data
54+
55+
config = CustomColumnConfig(name="expected_column", generator_function=add_multiple_columns)
56+
generator = CustomColumnGenerator(config=config, resource_provider=stub_resource_provider)
57+
58+
df = pd.DataFrame({"col1": [1, 2, 3]})
59+
60+
with pytest.raises(
61+
DataDesignerRuntimeError,
62+
match=r"Custom column generator add_multiple_columns added unexpected columns: unexpected_column",
63+
):
64+
generator.generate(df)
65+
66+
67+
def test_generate_error_when_no_column_added(stub_resource_provider: object) -> None:
68+
"""Test that an error is raised when the generator doesn't add the expected column."""
69+
70+
def add_no_columns(data: pd.DataFrame) -> pd.DataFrame:
71+
return data
72+
73+
config = CustomColumnConfig(name="missing_column", generator_function=add_no_columns)
74+
generator = CustomColumnGenerator(config=config, resource_provider=stub_resource_provider)
75+
76+
df = pd.DataFrame({"col1": [1, 2, 3]})
77+
78+
with pytest.raises(
79+
DataDesignerRuntimeError,
80+
match=r"Custom column generator add_no_columns added no columns",
81+
):
82+
generator.generate(df)
83+
84+
85+
def test_generate_error_when_generator_function_raises_exception(stub_resource_provider: object) -> None:
86+
"""Test that exceptions from the generator function are properly wrapped."""
87+
88+
def failing_generator(data: pd.DataFrame) -> pd.DataFrame:
89+
raise ValueError("Something went wrong in the generator")
90+
91+
config = CustomColumnConfig(name="test_column", generator_function=failing_generator)
92+
generator = CustomColumnGenerator(config=config, resource_provider=stub_resource_provider)
93+
94+
df = pd.DataFrame({"col1": [1, 2, 3]})
95+
96+
with pytest.raises(
97+
DataDesignerRuntimeError, match=r"Error generating custom column 'test_column': Something went wrong"
98+
):
99+
generator.generate(df)

0 commit comments

Comments
 (0)