1+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # SPDX-License-Identifier: Apache-2.0
3+
4+ import pandas as pd
5+ import pytest
6+
7+ from data_designer .config .columns import CustomColumnConfig
8+ from data_designer .engine .column_generators .generators .custom import CustomColumnGenerator
9+ from data_designer .engine .errors import DataDesignerRuntimeError
10+
11+
12+ def test_generate_successful_custom_column (stub_resource_provider : object ) -> None :
13+ """Test successful generation of a custom column."""
14+
15+ def add_sum_column (data : pd .DataFrame ) -> pd .DataFrame :
16+ data ["sum_column" ] = data ["col1" ] + data ["other_col" ]
17+ return data
18+
19+ config = CustomColumnConfig (name = "sum_column" , generator_function = add_sum_column )
20+ generator = CustomColumnGenerator (config = config , resource_provider = stub_resource_provider )
21+
22+ df = pd .DataFrame ({"col1" : [1 , 2 , 3 , 4 ], "other_col" : [10 , 20 , 30 , 40 ]})
23+ result = generator .generate (df )
24+
25+ assert "sum_column" in result .columns
26+ assert result ["sum_column" ].tolist () == [11 , 22 , 33 , 44 ]
27+ assert len (result ) == 4
28+
29+
30+ def test_generate_custom_column_with_string_data (stub_resource_provider : object ) -> None :
31+ """Test custom column generation with string manipulation."""
32+
33+ def add_full_name_column (data : pd .DataFrame ) -> pd .DataFrame :
34+ data ["full_name" ] = data ["first_name" ] + " " + data ["last_name" ]
35+ return data
36+
37+ config = CustomColumnConfig (name = "full_name" , generator_function = add_full_name_column )
38+ generator = CustomColumnGenerator (config = config , resource_provider = stub_resource_provider )
39+
40+ df = pd .DataFrame ({"first_name" : ["John" , "Jane" , "Bob" ], "last_name" : ["Doe" , "Smith" , "Johnson" ]})
41+ result = generator .generate (df )
42+
43+ assert "full_name" in result .columns
44+ assert result ["full_name" ].tolist () == ["John Doe" , "Jane Smith" , "Bob Johnson" ]
45+
46+
47+ def test_generate_error_when_unexpected_columns_added (stub_resource_provider : object ) -> None :
48+ """Test that an error is raised when the generator adds unexpected columns."""
49+
50+ def add_multiple_columns (data : pd .DataFrame ) -> pd .DataFrame :
51+ data ["expected_column" ] = data ["col1" ] * 2
52+ data ["unexpected_column" ] = data ["col1" ] * 3 # This should cause an error
53+ return data
54+
55+ config = CustomColumnConfig (name = "expected_column" , generator_function = add_multiple_columns )
56+ generator = CustomColumnGenerator (config = config , resource_provider = stub_resource_provider )
57+
58+ df = pd .DataFrame ({"col1" : [1 , 2 , 3 ]})
59+
60+ with pytest .raises (
61+ DataDesignerRuntimeError ,
62+ match = r"Custom column generator add_multiple_columns added unexpected columns: unexpected_column" ,
63+ ):
64+ generator .generate (df )
65+
66+
67+ def test_generate_error_when_no_column_added (stub_resource_provider : object ) -> None :
68+ """Test that an error is raised when the generator doesn't add the expected column."""
69+
70+ def add_no_columns (data : pd .DataFrame ) -> pd .DataFrame :
71+ return data
72+
73+ config = CustomColumnConfig (name = "missing_column" , generator_function = add_no_columns )
74+ generator = CustomColumnGenerator (config = config , resource_provider = stub_resource_provider )
75+
76+ df = pd .DataFrame ({"col1" : [1 , 2 , 3 ]})
77+
78+ with pytest .raises (
79+ DataDesignerRuntimeError ,
80+ match = r"Custom column generator add_no_columns added no columns" ,
81+ ):
82+ generator .generate (df )
83+
84+
85+ def test_generate_error_when_generator_function_raises_exception (stub_resource_provider : object ) -> None :
86+ """Test that exceptions from the generator function are properly wrapped."""
87+
88+ def failing_generator (data : pd .DataFrame ) -> pd .DataFrame :
89+ raise ValueError ("Something went wrong in the generator" )
90+
91+ config = CustomColumnConfig (name = "test_column" , generator_function = failing_generator )
92+ generator = CustomColumnGenerator (config = config , resource_provider = stub_resource_provider )
93+
94+ df = pd .DataFrame ({"col1" : [1 , 2 , 3 ]})
95+
96+ with pytest .raises (
97+ DataDesignerRuntimeError , match = r"Error generating custom column 'test_column': Something went wrong"
98+ ):
99+ generator .generate (df )
0 commit comments