1515import operator
1616from dataclasses import dataclass
1717from functools import partial
18- from typing import Callable
18+ from typing import Callable , Union
1919
2020import numpy as np
2121import pandas as pd
@@ -47,6 +47,16 @@ class FunctionOptions:
4747)
4848
4949
50+ def sort_dataframe (
51+ df : Union [pd .DataFrame , pd .Series ], index : bool = True , columns : bool = True
52+ ):
53+ if index :
54+ df .sort_index (inplace = True )
55+ if columns and isinstance (df , pd .DataFrame ):
56+ df .sort_index (axis = 1 , inplace = True )
57+ return df
58+
59+
5060def to_boolean_if_needed (func_name , value , split_value = 0.5 ):
5161 if func_name in ["__and__" , "__or__" , "__xor__" ]:
5262 return value > split_value
@@ -81,7 +91,7 @@ def test_without_shuffle_execution(setup, func_name, func_opts):
8191 expected = func_opts .func (data1 , data2 )
8292 result = df3 .execute ().fetch ()
8393
84- pd .testing .assert_frame_equal (expected , result )
94+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
8595
8696
8797@pytest .mark .parametrize ("func_name, func_opts" , binary_functions .items ())
@@ -113,7 +123,7 @@ def test_with_one_shuffle_execution(setup, func_name, func_opts):
113123 expected = func_opts .func (data1 , data2 )
114124 result = df3 .execute ().fetch ()
115125
116- pd .testing .assert_frame_equal (expected , result )
126+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
117127
118128 # only 1 axis is monotonic
119129 # data1 with columns split into [0...4], [5...9],
@@ -138,7 +148,7 @@ def test_with_one_shuffle_execution(setup, func_name, func_opts):
138148 expected = func_opts .func (data1 , data2 )
139149 result = df3 .execute ().fetch ()
140150
141- pd .testing .assert_frame_equal (expected , result )
151+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
142152
143153
144154@pytest .mark .parametrize ("func_name, func_opts" , binary_functions .items ())
@@ -168,7 +178,7 @@ def test_with_all_shuffle_execution(setup, func_name, func_opts):
168178 expected = func_opts .func (data1 , data2 )
169179 result = df3 .execute ().fetch ()
170180
171- pd .testing .assert_frame_equal (expected , result )
181+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
172182
173183
174184@pytest .mark .parametrize ("func_name, func_opts" , binary_functions .items ())
@@ -257,7 +267,7 @@ def test_without_shuffle_and_with_one_chunk(setup, func_name, func_opts):
257267 expected = func_opts .func (data1 , data2 )
258268 result = df3 .execute ().fetch ()
259269
260- pd .testing .assert_frame_equal (expected , result )
270+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
261271
262272 # only 1 axis is monotonic
263273 # data1 with columns split into [0...4], [5...9],
@@ -282,7 +292,7 @@ def test_without_shuffle_and_with_one_chunk(setup, func_name, func_opts):
282292 expected = func_opts .func (data1 , data2 )
283293 result = df3 .execute ().fetch ()
284294
285- pd .testing .assert_frame_equal (expected , result )
295+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
286296
287297
288298@pytest .mark .parametrize ("func_name, func_opts" , binary_functions .items ())
@@ -312,7 +322,7 @@ def test_with_shuffle_and_with_one_chunk(setup, func_name, func_opts):
312322 expected = func_opts .func (data1 , data2 )
313323 result = df3 .execute ().fetch ()
314324
315- pd .testing .assert_frame_equal (expected , result )
325+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
316326
317327 # only 1 axis is monotonic
318328 # data1 with columns split into [0...4], [5...9],
@@ -337,7 +347,7 @@ def test_with_shuffle_and_with_one_chunk(setup, func_name, func_opts):
337347 expected = func_opts .func (data1 , data2 )
338348 result = df3 .execute ().fetch ()
339349
340- pd .testing .assert_frame_equal (expected , result )
350+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
341351
342352
343353@pytest .mark .parametrize ("func_name, func_opts" , binary_functions .items ())
@@ -497,7 +507,7 @@ def test_with_shuffle_on_string_index(setup, func_name, func_opts):
497507 expected = func_opts .func (data1 , data2 )
498508 result = df3 .execute ().fetch ()
499509
500- pd .testing .assert_frame_equal (expected , result )
510+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
501511
502512
503513@pytest .mark .parametrize ("func_name, func_opts" , binary_functions .items ())
@@ -527,23 +537,23 @@ def test_dataframe_and_series(setup, func_name, func_opts):
527537
528538 expected = getattr (data1 [[1 ]], func_opts .func_name )(data2 [1 ], axis = "index" )
529539 result = r1 .execute ().fetch ()
530- pd .testing .assert_frame_equal (expected , result )
540+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
531541
532542 # operate on dataframe and series without shuffle
533543 df2 = from_pandas (data1 , chunk_size = (5 , 5 ))
534544 r2 = getattr (df2 , func_opts .func_name )(s1 , axis = "index" )
535545
536546 expected = getattr (data1 , func_opts .func_name )(data2 [1 ], axis = "index" )
537547 result = r2 .execute ().fetch ()
538- pd .testing .assert_frame_equal (expected , result )
548+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
539549
540550 # operate on dataframe and series with shuffle
541551 df3 = from_pandas (data1 , chunk_size = (5 , 5 ))
542552 r3 = getattr (df3 , func_opts .func_name )(s1 , axis = "columns" )
543553
544554 expected = getattr (data1 , func_opts .func_name )(data2 [1 ], axis = "columns" )
545555 result = r3 .execute ().fetch ()
546- pd .testing .assert_frame_equal (expected , result )
556+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
547557
548558 # test both one chunk, axis=0
549559 pdf = pd .DataFrame ({"ca" : [1 , 3 , 2 ], "cb" : [360 , 180 , 2 ]}, index = [1 , 2 , 3 ])
@@ -553,7 +563,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
553563 mars_series = from_pandas_series (series )
554564 result = getattr (df , func_opts .func_name )(mars_series , axis = 0 ).execute ().fetch ()
555565 expected = getattr (pdf , func_opts .func_name )(series , axis = 0 )
556- pd .testing .assert_frame_equal (expected , result )
566+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
557567
558568 # test different number of chunks, axis=0
559569 pdf = pd .DataFrame ({"ca" : [1 , 3 , 2 ], "cb" : [360 , 180 , 2 ]}, index = [1 , 2 , 3 ])
@@ -563,7 +573,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
563573 mars_series = from_pandas_series (series )
564574 result = getattr (df , func_opts .func_name )(mars_series , axis = 0 ).execute ().fetch ()
565575 expected = getattr (pdf , func_opts .func_name )(series , axis = 0 )
566- pd .testing .assert_frame_equal (expected , result )
576+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
567577
568578 # test with row shuffle, axis=0
569579 pdf = pd .DataFrame ({"ca" : [1 , 3 , 2 ], "cb" : [360 , 180 , 2 ]}, index = [2 , 1 , 3 ])
@@ -575,7 +585,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
575585 expected = getattr (pdf , func_opts .func_name )(series , axis = 0 ).reindex ([3 , 1 , 2 ])
576586 # modify the order of rows
577587 result = result .reindex (index = [3 , 1 , 2 ])
578- pd .testing .assert_frame_equal (expected , result )
588+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
579589
580590 # test both one chunk, axis=1
581591 pdf = pd .DataFrame (
@@ -587,7 +597,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
587597 mars_series = from_pandas_series (series )
588598 result = getattr (df , func_opts .func_name )(mars_series , axis = 1 ).execute ().fetch ()
589599 expected = getattr (pdf , func_opts .func_name )(series , axis = 1 )
590- pd .testing .assert_frame_equal (expected , result )
600+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
591601
592602 # test different number of chunks, axis=1
593603 pdf = pd .DataFrame (
@@ -599,7 +609,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
599609 mars_series = from_pandas_series (series )
600610 result = getattr (df , func_opts .func_name )(mars_series , axis = 1 ).execute ().fetch ()
601611 expected = getattr (pdf , func_opts .func_name )(series , axis = 1 )
602- pd .testing .assert_frame_equal (expected , result )
612+ pd .testing .assert_frame_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
603613
604614 # test with row shuffle, axis=1
605615 pdf = pd .DataFrame (
@@ -665,7 +675,7 @@ def test_series(setup, func_name, func_opts):
665675 )
666676 result = r .execute ().fetch ()
667677 expected = func_opts .func (s1 , s2 )
668- pd .testing .assert_series_equal (expected , result )
678+ pd .testing .assert_series_equal (sort_dataframe ( expected ), sort_dataframe ( result ) )
669679
670680 if func_opts .func_name in ["__and__" , "__or__" , "__xor__" ]:
671681 # bitwise logical operators doesn\'t support floating point scalars
0 commit comments