Skip to content

Commit 692a1ca

Browse files
committed
test: add e2e test for list-valued options order preservation
Runs list-valued options through mloda.run_all() pipeline to verify that the list-to-tuple conversion preserves element order. Two tests use different column orderings with weighted sums to prove order matters and is correctly maintained.
1 parent 5301462 commit 692a1ca

File tree

1 file changed

+207
-0
lines changed

1 file changed

+207
-0
lines changed
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
"""End-to-end test for list-valued options in PROPERTY_MAPPING (issue #228).
2+
3+
Verifies that list-valued options pass through the mloda pipeline without
4+
TypeError and that element order is preserved via tuple conversion.
5+
"""
6+
7+
from typing import Any, Dict, Optional, Set, Type, Union
8+
9+
from mloda.provider import ComputeFramework
10+
from mloda.provider import FeatureGroup
11+
from mloda.provider import FeatureSet
12+
from mloda.provider import DataCreator
13+
from mloda.provider import BaseInputData
14+
from mloda.user import Feature
15+
from mloda.user import FeatureName
16+
from mloda.user import Options
17+
from mloda.user import PluginCollector
18+
from mloda.user import mloda
19+
from mloda.core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
20+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
21+
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
22+
23+
import pandas as pd
24+
25+
26+
class ListValuedTestDataCreator(FeatureGroup):
27+
"""Creates test data with three columns."""
28+
29+
@classmethod
30+
def input_data(cls) -> Optional[BaseInputData]:
31+
return DataCreator({"col_a", "col_b", "col_c"})
32+
33+
@classmethod
34+
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
35+
return pd.DataFrame(
36+
{
37+
"col_a": [1, 2, 3],
38+
"col_b": [10, 20, 30],
39+
"col_c": [100, 200, 300],
40+
}
41+
)
42+
43+
@classmethod
44+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
45+
return {PandasDataFrame}
46+
47+
48+
class ListValuedFeatureGroup(FeatureGroup):
49+
"""Feature group that accepts a list-valued 'columns' option.
50+
51+
Computes an order-dependent weighted sum:
52+
result = columns[0]*1 + columns[1]*10 + columns[2]*100
53+
"""
54+
55+
PROPERTY_MAPPING = {
56+
"columns": {
57+
"explanation": "List of columns to combine in order",
58+
DefaultOptionKeys.context: True,
59+
DefaultOptionKeys.strict_validation: False,
60+
},
61+
DefaultOptionKeys.in_features: {
62+
"explanation": "Source features",
63+
DefaultOptionKeys.context: True,
64+
},
65+
}
66+
67+
@classmethod
68+
def match_feature_group_criteria(
69+
cls,
70+
feature_name: Union[FeatureName, str],
71+
options: Options,
72+
data_access_collection: Optional[Any] = None,
73+
) -> bool:
74+
_name = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
75+
return FeatureChainParser.match_configuration_feature_chain_parser(
76+
_name,
77+
options,
78+
property_mapping=cls.PROPERTY_MAPPING,
79+
)
80+
81+
def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
82+
source_features = options.get_in_features()
83+
return set(source_features)
84+
85+
@classmethod
86+
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
87+
for feature in features.features:
88+
columns_raw = feature.options.get("columns")
89+
90+
if isinstance(columns_raw, str):
91+
columns = eval(columns_raw)
92+
elif isinstance(columns_raw, (list, tuple)):
93+
columns = list(columns_raw)
94+
else:
95+
columns = list(columns_raw)
96+
97+
weights = [1, 10, 100]
98+
result = data[columns[0]] * weights[0]
99+
for i in range(1, len(columns)):
100+
result = result + data[columns[i]] * weights[i]
101+
102+
data[feature.get_name()] = result
103+
return data
104+
105+
106+
class TestListValuedOptionsE2E:
107+
"""End-to-end tests for list-valued options through the mloda pipeline."""
108+
109+
plugin_collector = PluginCollector.enabled_feature_groups(
110+
{ListValuedTestDataCreator, ListValuedFeatureGroup}
111+
)
112+
113+
def test_list_valued_option_order_preserved(self) -> None:
114+
"""List-valued option order is preserved through the pipeline.
115+
116+
Order [col_a, col_b, col_c] with weights [1, 10, 100] gives:
117+
row 0: 1*1 + 10*10 + 100*100 = 10101
118+
Order [col_c, col_b, col_a] with weights [1, 10, 100] gives:
119+
row 0: 100*1 + 10*10 + 1*100 = 300
120+
"""
121+
feature_abc = Feature(
122+
name="weighted_abc",
123+
options=Options(
124+
context={
125+
DefaultOptionKeys.in_features: "col_a",
126+
"columns": ["col_a", "col_b", "col_c"],
127+
},
128+
),
129+
)
130+
131+
result = mloda.run_all(
132+
[feature_abc],
133+
compute_frameworks={PandasDataFrame},
134+
plugin_collector=self.plugin_collector,
135+
)
136+
137+
assert len(result) >= 1
138+
139+
for df in result:
140+
if "weighted_abc" in df.columns:
141+
abc_values = df["weighted_abc"].tolist()
142+
# col_a=1, col_b=10, col_c=100 with weights [1, 10, 100]:
143+
# abc: 1*1 + 10*10 + 100*100 = 10101
144+
assert abc_values[0] == 10101, f"Expected 10101, got {abc_values[0]}"
145+
return
146+
147+
raise AssertionError("weighted_abc not found in results")
148+
149+
def test_list_valued_option_different_order(self) -> None:
150+
"""Reversed column order produces different results, proving order preservation."""
151+
feature_cba = Feature(
152+
name="weighted_cba",
153+
options=Options(
154+
context={
155+
DefaultOptionKeys.in_features: "col_a",
156+
"columns": ["col_c", "col_b", "col_a"],
157+
},
158+
),
159+
)
160+
161+
result = mloda.run_all(
162+
[feature_cba],
163+
compute_frameworks={PandasDataFrame},
164+
plugin_collector=self.plugin_collector,
165+
)
166+
167+
assert len(result) >= 1
168+
169+
for df in result:
170+
if "weighted_cba" in df.columns:
171+
cba_values = df["weighted_cba"].tolist()
172+
# col_c=100, col_b=10, col_a=1 with weights [1, 10, 100]:
173+
# cba: 100*1 + 10*10 + 1*100 = 300
174+
assert cba_values[0] == 300, f"Expected 300, got {cba_values[0]}"
175+
return
176+
177+
raise AssertionError("weighted_cba not found in results")
178+
179+
def test_list_valued_in_features(self) -> None:
180+
"""in_features passed as a list works through the pipeline."""
181+
feature = Feature(
182+
name="weighted_list_in",
183+
options=Options(
184+
context={
185+
DefaultOptionKeys.in_features: ["col_a", "col_b"],
186+
"columns": ["col_a", "col_b", "col_c"],
187+
},
188+
),
189+
)
190+
191+
result = mloda.run_all(
192+
[feature],
193+
compute_frameworks={PandasDataFrame},
194+
plugin_collector=self.plugin_collector,
195+
)
196+
197+
assert len(result) >= 1
198+
199+
for df in result:
200+
if "weighted_list_in" in df.columns:
201+
values = df["weighted_list_in"].tolist()
202+
# col_a=1, col_b=10, col_c=100 with weights [1, 10, 100]:
203+
# 1*1 + 10*10 + 100*100 = 10101
204+
assert values[0] == 10101, f"Expected 10101, got {values[0]}"
205+
return
206+
207+
raise AssertionError("weighted_list_in not found in results")

0 commit comments

Comments
 (0)