@@ -2670,9 +2670,39 @@ def test_join_deduplicate_select():
26702670 assert multi_result .column (1 ).to_pylist () == expected_data ["name" ]
26712671 assert multi_result .column (2 ).to_pylist () == expected_data ["city" ]
26722672
2673+ # Test that schema only contains expected column names (no internal aliases)
2674+ joined_schema = joined_df .schema ()
2675+ column_names = [field .name for field in joined_schema ]
2676+ expected_columns = ["id" , "name" , "city" ]
2677+ assert column_names == expected_columns
2678+
2679+ # Ensure no internal alias names like "__right_id" appear in the schema
2680+ for col_name in column_names :
2681+ assert not col_name .startswith ("__" ), f"Internal alias '{ col_name } ' leaked into schema"
2682+
2683+ # Test selecting each column individually to ensure they all work
2684+ for col_name in expected_columns :
2685+ individual_select = joined_df .select (column (col_name ))
2686+ result = individual_select .collect ()[0 ]
2687+ assert len (result ) == 2 , f"Column '{ col_name } ' selection failed"
2688+ assert result .schema .field (0 ).name == col_name
2689+
2690+ # Test that we can select all columns using their names
2691+ all_columns_select = joined_df .select (* [column (name ) for name in expected_columns ])
2692+ all_result = all_columns_select .collect ()[0 ]
2693+ assert all_result .schema .names == expected_columns
2694+
2695+ # Verify that attempting to select a potential internal alias fails appropriately
2696+ with pytest .raises (Exception ): # Should raise an error for non-existent column
2697+ joined_df .select (column ("__right_id" )).collect ()
2698+
26732699
26742700def test_join_deduplicate_all_types ():
2675- """Test deduplication behavior across different join types (left, right, outer)."""
2701+ """Test deduplication behavior across different join types (left, right, outer).
2702+
2703+ Note: This test may show linting errors due to method signature overloads,
2704+ but the functionality should work correctly at runtime.
2705+ """
26762706 ctx = SessionContext ()
26772707
26782708 # Create left dataframe with some rows that won't match
0 commit comments