Skip to content

Commit 0ab42a5

Browse files
committed
test: add e2e test for list-valued options order preservation
Runs list-valued options through mloda.run_all() pipeline to verify that the list-to-tuple conversion preserves element order. Two tests use different column orderings with weighted sums to prove order matters and is correctly maintained.
1 parent 1a501d0 commit 0ab42a5

File tree

1 file changed

+208
-0
lines changed

1 file changed

+208
-0
lines changed
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
"""End-to-end test for list-valued options in PROPERTY_MAPPING (issue #228).
2+
3+
Verifies that list-valued options pass through the mloda pipeline without
4+
TypeError and that element order is preserved via tuple conversion.
5+
"""
6+
7+
import ast
8+
from typing import Any, Dict, Optional, Set, Type, Union
9+
10+
from mloda.provider import ComputeFramework
11+
from mloda.provider import FeatureGroup
12+
from mloda.provider import FeatureSet
13+
from mloda.provider import DataCreator
14+
from mloda.provider import BaseInputData
15+
from mloda.user import Feature
16+
from mloda.user import FeatureName
17+
from mloda.user import Options
18+
from mloda.user import PluginCollector
19+
from mloda.user import mloda
20+
from mloda.core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
21+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
22+
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
23+
24+
import pandas as pd
25+
26+
27+
class ListValuedTestDataCreator(FeatureGroup):
28+
"""Creates test data with three columns."""
29+
30+
@classmethod
31+
def input_data(cls) -> Optional[BaseInputData]:
32+
return DataCreator({"col_a", "col_b", "col_c"})
33+
34+
@classmethod
35+
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
36+
return pd.DataFrame(
37+
{
38+
"col_a": [1, 2, 3],
39+
"col_b": [10, 20, 30],
40+
"col_c": [100, 200, 300],
41+
}
42+
)
43+
44+
@classmethod
45+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
46+
return {PandasDataFrame}
47+
48+
49+
class ListValuedFeatureGroup(FeatureGroup):
50+
"""Feature group that accepts a list-valued 'columns' option.
51+
52+
Computes an order-dependent weighted sum:
53+
result = columns[0]*1 + columns[1]*10 + columns[2]*100
54+
"""
55+
56+
PROPERTY_MAPPING = {
57+
"columns": {
58+
"explanation": "List of columns to combine in order",
59+
DefaultOptionKeys.context: True,
60+
DefaultOptionKeys.strict_validation: False,
61+
},
62+
DefaultOptionKeys.in_features: {
63+
"explanation": "Source features",
64+
DefaultOptionKeys.context: True,
65+
},
66+
}
67+
68+
@classmethod
69+
def match_feature_group_criteria(
70+
cls,
71+
feature_name: Union[FeatureName, str],
72+
options: Options,
73+
data_access_collection: Optional[Any] = None,
74+
) -> bool:
75+
_name = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
76+
return FeatureChainParser.match_configuration_feature_chain_parser(
77+
_name,
78+
options,
79+
property_mapping=cls.PROPERTY_MAPPING,
80+
)
81+
82+
def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
83+
source_features = options.get_in_features()
84+
return set(source_features)
85+
86+
@classmethod
87+
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
88+
for feature in features.features:
89+
columns_raw = feature.options.get("columns")
90+
91+
if isinstance(columns_raw, str):
92+
columns = ast.literal_eval(columns_raw)
93+
elif isinstance(columns_raw, (list, tuple)):
94+
columns = list(columns_raw)
95+
else:
96+
columns = list(columns_raw)
97+
98+
weights = [1, 10, 100]
99+
result = data[columns[0]] * weights[0]
100+
for i in range(1, len(columns)):
101+
result = result + data[columns[i]] * weights[i]
102+
103+
data[feature.get_name()] = result
104+
return data
105+
106+
107+
class TestListValuedOptionsE2E:
108+
"""End-to-end tests for list-valued options through the mloda pipeline."""
109+
110+
plugin_collector = PluginCollector.enabled_feature_groups(
111+
{ListValuedTestDataCreator, ListValuedFeatureGroup}
112+
)
113+
114+
def test_list_valued_option_order_preserved(self) -> None:
115+
"""List-valued option order is preserved through the pipeline.
116+
117+
Order [col_a, col_b, col_c] with weights [1, 10, 100] gives:
118+
row 0: 1*1 + 10*10 + 100*100 = 10101
119+
Order [col_c, col_b, col_a] with weights [1, 10, 100] gives:
120+
row 0: 100*1 + 10*10 + 1*100 = 300
121+
"""
122+
feature_abc = Feature(
123+
name="weighted_abc",
124+
options=Options(
125+
context={
126+
DefaultOptionKeys.in_features: "col_a",
127+
"columns": ["col_a", "col_b", "col_c"],
128+
},
129+
),
130+
)
131+
132+
result = mloda.run_all(
133+
[feature_abc],
134+
compute_frameworks={PandasDataFrame},
135+
plugin_collector=self.plugin_collector,
136+
)
137+
138+
assert len(result) >= 1
139+
140+
for df in result:
141+
if "weighted_abc" in df.columns:
142+
abc_values = df["weighted_abc"].tolist()
143+
# col_a=1, col_b=10, col_c=100 with weights [1, 10, 100]:
144+
# abc: 1*1 + 10*10 + 100*100 = 10101
145+
assert abc_values[0] == 10101, f"Expected 10101, got {abc_values[0]}"
146+
return
147+
148+
raise AssertionError("weighted_abc not found in results")
149+
150+
def test_list_valued_option_different_order(self) -> None:
151+
"""Reversed column order produces different results, proving order preservation."""
152+
feature_cba = Feature(
153+
name="weighted_cba",
154+
options=Options(
155+
context={
156+
DefaultOptionKeys.in_features: "col_a",
157+
"columns": ["col_c", "col_b", "col_a"],
158+
},
159+
),
160+
)
161+
162+
result = mloda.run_all(
163+
[feature_cba],
164+
compute_frameworks={PandasDataFrame},
165+
plugin_collector=self.plugin_collector,
166+
)
167+
168+
assert len(result) >= 1
169+
170+
for df in result:
171+
if "weighted_cba" in df.columns:
172+
cba_values = df["weighted_cba"].tolist()
173+
# col_c=100, col_b=10, col_a=1 with weights [1, 10, 100]:
174+
# cba: 100*1 + 10*10 + 1*100 = 300
175+
assert cba_values[0] == 300, f"Expected 300, got {cba_values[0]}"
176+
return
177+
178+
raise AssertionError("weighted_cba not found in results")
179+
180+
def test_list_valued_in_features(self) -> None:
181+
"""in_features passed as a list works through the pipeline."""
182+
feature = Feature(
183+
name="weighted_list_in",
184+
options=Options(
185+
context={
186+
DefaultOptionKeys.in_features: ["col_a", "col_b"],
187+
"columns": ["col_a", "col_b", "col_c"],
188+
},
189+
),
190+
)
191+
192+
result = mloda.run_all(
193+
[feature],
194+
compute_frameworks={PandasDataFrame},
195+
plugin_collector=self.plugin_collector,
196+
)
197+
198+
assert len(result) >= 1
199+
200+
for df in result:
201+
if "weighted_list_in" in df.columns:
202+
values = df["weighted_list_in"].tolist()
203+
# col_a=1, col_b=10, col_c=100 with weights [1, 10, 100]:
204+
# 1*1 + 10*10 + 100*100 = 10101
205+
assert values[0] == 10101, f"Expected 10101, got {values[0]}"
206+
return
207+
208+
raise AssertionError("weighted_list_in not found in results")

0 commit comments

Comments
 (0)