@@ -339,7 +339,7 @@ def test_join():
339339
340340 # Verify we don't make a breaking change to pre-43.0.0
341341 # where users would pass join_keys as a positional argument
342- df2 = df .join (df1 , (["a" ], ["a" ]), how = "inner" ) # type: ignore
342+ df2 = df .join (df1 , (["a" ], ["a" ]), how = "inner" )
343343 df2 .show ()
344344 df2 = df2 .sort (column ("l.a" ))
345345 table = pa .Table .from_batches (df2 .collect ())
@@ -375,17 +375,17 @@ def test_join_invalid_params():
375375 with pytest .raises (
376376 ValueError , match = r"`left_on` or `right_on` should not provided with `on`"
377377 ):
378- df2 = df .join (df1 , on = "a" , how = "inner" , right_on = "test" ) # type: ignore
378+ df2 = df .join (df1 , on = "a" , how = "inner" , right_on = "test" )
379379
380380 with pytest .raises (
381381 ValueError , match = r"`left_on` and `right_on` should both be provided."
382382 ):
383- df2 = df .join (df1 , left_on = "a" , how = "inner" ) # type: ignore
383+ df2 = df .join (df1 , left_on = "a" , how = "inner" )
384384
385385 with pytest .raises (
386386 ValueError , match = r"either `on` or `left_on` and `right_on` should be provided."
387387 ):
388- df2 = df .join (df1 , how = "inner" ) # type: ignore
388+ df2 = df .join (df1 , how = "inner" )
389389
390390
391391def test_join_on ():
@@ -567,7 +567,7 @@ def test_distinct():
567567]
568568
569569
570- @pytest .mark .parametrize ("name, expr, result" , data_test_window_functions )
570+ @pytest .mark .parametrize (( "name" , " expr" , " result") , data_test_window_functions )
571571def test_window_functions (partitioned_df , name , expr , result ):
572572 df = partitioned_df .select (
573573 column ("a" ), column ("b" ), column ("c" ), f .alias (expr , name )
@@ -730,7 +730,9 @@ def test_optimized_logical_plan(aggregate_df):
730730def test_execution_plan (aggregate_df ):
731731 plan = aggregate_df .execution_plan ()
732732
733- expected = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n " # noqa: E501
733+ expected = (
734+ "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n "
735+ )
734736
735737 assert expected == plan .display ()
736738
@@ -754,7 +756,7 @@ def test_execution_plan(aggregate_df):
754756
755757 ctx = SessionContext ()
756758 rows_returned = 0
757- for idx in range (0 , plan .partition_count ):
759+ for idx in range (plan .partition_count ):
758760 stream = ctx .execute (plan , idx )
759761 try :
760762 batch = stream .next ()
@@ -883,7 +885,7 @@ def test_union_distinct(ctx):
883885 )
884886 df_c = ctx .create_dataframe ([[batch ]]).sort (column ("a" ))
885887
886- df_a_u_b = df_a .union (df_b , True ).sort (column ("a" ))
888+ df_a_u_b = df_a .union (df_b , distinct = True ).sort (column ("a" ))
887889
888890 assert df_c .collect () == df_a_u_b .collect ()
889891 assert df_c .collect () == df_a_u_b .collect ()
@@ -952,8 +954,6 @@ def test_to_arrow_table(df):
952954
953955def test_execute_stream (df ):
954956 stream = df .execute_stream ()
955- for s in stream :
956- print (type (s ))
957957 assert all (batch is not None for batch in stream )
958958 assert not list (stream ) # after one iteration the generator must be exhausted
959959
@@ -967,7 +967,7 @@ def test_execute_stream_to_arrow_table(df, schema):
967967 (batch .to_pyarrow () for batch in stream ), schema = df .schema ()
968968 )
969969 else :
970- pyarrow_table = pa .Table .from_batches (( batch .to_pyarrow () for batch in stream ) )
970+ pyarrow_table = pa .Table .from_batches (batch .to_pyarrow () for batch in stream )
971971
972972 assert isinstance (pyarrow_table , pa .Table )
973973 assert pyarrow_table .shape == (3 , 3 )
@@ -1031,7 +1031,7 @@ def test_describe(df):
10311031 }
10321032
10331033
1034- @pytest .mark .parametrize ("path_to_str" , ( True , False ) )
1034+ @pytest .mark .parametrize ("path_to_str" , [ True , False ] )
10351035def test_write_csv (ctx , df , tmp_path , path_to_str ):
10361036 path = str (tmp_path ) if path_to_str else tmp_path
10371037
@@ -1044,7 +1044,7 @@ def test_write_csv(ctx, df, tmp_path, path_to_str):
10441044 assert result == expected
10451045
10461046
1047- @pytest .mark .parametrize ("path_to_str" , ( True , False ) )
1047+ @pytest .mark .parametrize ("path_to_str" , [ True , False ] )
10481048def test_write_json (ctx , df , tmp_path , path_to_str ):
10491049 path = str (tmp_path ) if path_to_str else tmp_path
10501050
@@ -1057,7 +1057,7 @@ def test_write_json(ctx, df, tmp_path, path_to_str):
10571057 assert result == expected
10581058
10591059
1060- @pytest .mark .parametrize ("path_to_str" , ( True , False ) )
1060+ @pytest .mark .parametrize ("path_to_str" , [ True , False ] )
10611061def test_write_parquet (df , tmp_path , path_to_str ):
10621062 path = str (tmp_path ) if path_to_str else tmp_path
10631063
@@ -1069,7 +1069,7 @@ def test_write_parquet(df, tmp_path, path_to_str):
10691069
10701070
10711071@pytest .mark .parametrize (
1072- "compression, compression_level" ,
1072+ ( "compression" , " compression_level") ,
10731073 [("gzip" , 6 ), ("brotli" , 7 ), ("zstd" , 15 )],
10741074)
10751075def test_write_compressed_parquet (df , tmp_path , compression , compression_level ):
@@ -1080,7 +1080,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
10801080 )
10811081
10821082 # test that the actual compression scheme is the one written
1083- for root , dirs , files in os .walk (path ):
1083+ for _root , _dirs , files in os .walk (path ):
10841084 for file in files :
10851085 if file .endswith (".parquet" ):
10861086 metadata = pq .ParquetFile (tmp_path / file ).metadata .to_dict ()
@@ -1095,7 +1095,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
10951095
10961096
10971097@pytest .mark .parametrize (
1098- "compression, compression_level" ,
1098+ ( "compression" , " compression_level") ,
10991099 [("gzip" , 12 ), ("brotli" , 15 ), ("zstd" , 23 ), ("wrong" , 12 )],
11001100)
11011101def test_write_compressed_parquet_wrong_compression_level (
@@ -1150,7 +1150,7 @@ def test_dataframe_export(df) -> None:
11501150 table = pa .table (df , schema = desired_schema )
11511151 assert table .num_columns == 1
11521152 assert table .num_rows == 3
1153- for i in range (0 , 3 ):
1153+ for i in range (3 ):
11541154 assert table [0 ][i ].as_py () is None
11551155
11561156 # Expect an error when we cannot convert schema
@@ -1184,8 +1184,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
11841184 result = df .to_pydict ()
11851185
11861186 assert result ["a" ] == [1 , 2 , 3 ]
1187- assert result ["string_col" ] == ["string data" for _i in range (0 , 3 )]
1188- assert result ["new_col" ] == [3 for _i in range (0 , 3 )]
1187+ assert result ["string_col" ] == ["string data" for _i in range (3 )]
1188+ assert result ["new_col" ] == [3 for _i in range (3 )]
11891189
11901190
11911191def test_dataframe_repr_html (df ) -> None :
0 commit comments