Skip to content

Commit ec769fe

Browse files
committed
test: enhance join deduplication tests with schema validation and individual column selection
1 parent 4dd5369 commit ec769fe

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

python/tests/test_dataframe.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2670,9 +2670,39 @@ def test_join_deduplicate_select():
26702670
assert multi_result.column(1).to_pylist() == expected_data["name"]
26712671
assert multi_result.column(2).to_pylist() == expected_data["city"]
26722672

2673+
# Test that schema only contains expected column names (no internal aliases)
2674+
joined_schema = joined_df.schema()
2675+
column_names = [field.name for field in joined_schema]
2676+
expected_columns = ["id", "name", "city"]
2677+
assert column_names == expected_columns
2678+
2679+
# Ensure no internal alias names like "__right_id" appear in the schema
2680+
for col_name in column_names:
2681+
assert not col_name.startswith("__"), f"Internal alias '{col_name}' leaked into schema"
2682+
2683+
# Test selecting each column individually to ensure they all work
2684+
for col_name in expected_columns:
2685+
individual_select = joined_df.select(column(col_name))
2686+
result = individual_select.collect()[0]
2687+
assert len(result) == 2, f"Column '{col_name}' selection failed"
2688+
assert result.schema.field(0).name == col_name
2689+
2690+
# Test that we can select all columns using their names
2691+
all_columns_select = joined_df.select(*[column(name) for name in expected_columns])
2692+
all_result = all_columns_select.collect()[0]
2693+
assert all_result.schema.names == expected_columns
2694+
2695+
# Verify that attempting to select a potential internal alias fails appropriately
2696+
with pytest.raises(Exception): # Should raise an error for non-existent column
2697+
joined_df.select(column("__right_id")).collect()
2698+
26732699

26742700
def test_join_deduplicate_all_types():
2675-
"""Test deduplication behavior across different join types (left, right, outer)."""
2701+
"""Test deduplication behavior across different join types (left, right, outer).
2702+
2703+
Note: This test may show linting errors due to method signature overloads,
2704+
but the functionality should work correctly at runtime.
2705+
"""
26762706
ctx = SessionContext()
26772707

26782708
# Create left dataframe with some rows that won't match

0 commit comments

Comments
 (0)