@@ -1670,7 +1670,11 @@ def analyze(row):
1670
1670
(3 , 4 ): ["pq" , "rs" , "tu" ],
1671
1671
(5.0 , "six" , 7 ): [8 , 9 , 10 ],
1672
1672
'raise Exception("hacked!")' : [11 , 12 , 13 ],
1673
- }
1673
+ },
1674
+ # Default pandas index has non-numpy type, whereas bigframes is
1675
+ # always numpy-based type, so let's use the index compatible
1676
+ # with bigframes. See more details in b/369689696.
1677
+ index = pandas .Index ([0 , 1 , 2 ], dtype = pandas .Int64Dtype ()),
1674
1678
),
1675
1679
id = "all-kinds-of-column-names" ,
1676
1680
),
@@ -1681,17 +1685,22 @@ def analyze(row):
1681
1685
"y" : [1.5 , 3.75 , 5 ],
1682
1686
"z" : ["pq" , "rs" , "tu" ],
1683
1687
},
1684
- index = pandas .MultiIndex .from_tuples (
1685
- [
1686
- ("a" , 100 ),
1687
- ("a" , 200 ),
1688
- ("b" , 300 ),
1689
- ]
1688
+ index = pandas .MultiIndex .from_frame (
1689
+ pandas .DataFrame (
1690
+ {
1691
+ "idx0" : pandas .Series (
1692
+ ["a" , "a" , "b" ], dtype = pandas .StringDtype ()
1693
+ ),
1694
+ "idx1" : pandas .Series (
1695
+ [100 , 200 , 300 ], dtype = pandas .Int64Dtype ()
1696
+ ),
1697
+ }
1698
+ )
1690
1699
),
1691
1700
),
1692
1701
id = "multiindex" ,
1693
1702
marks = pytest .mark .skip (
1694
- reason = "TODO(b/368639580) revert this skip after fix "
1703
+ reason = "TODO: revert this skip after this pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/59908 "
1695
1704
),
1696
1705
),
1697
1706
pytest .param (
@@ -1701,6 +1710,10 @@ def analyze(row):
1701
1710
[20 , 3.75 , "rs" ],
1702
1711
[30 , 8.0 , "tu" ],
1703
1712
],
1713
+ # Default pandas index has non-numpy type, whereas bigframes is
1714
+ # always numpy-based type, so let's use the index compatible
1715
+ # with bigframes. See more details in b/369689696.
1716
+ index = pandas .Index ([0 , 1 , 2 ], dtype = pandas .Int64Dtype ()),
1704
1717
columns = pandas .MultiIndex .from_arrays (
1705
1718
[
1706
1719
["first" , "last_two" , "last_two" ],
@@ -1729,10 +1742,8 @@ def test_df_apply_axis_1_complex(session, pd_df):
1729
1742
1730
1743
def serialize_row (row ):
1731
1744
custom = {
1732
- "name" : row .name .item () if hasattr (row .name , "item" ) else row .name ,
1733
- "index" : [
1734
- idx .item () if hasattr (idx , "item" ) else idx for idx in row .index
1735
- ],
1745
+ "name" : row .name ,
1746
+ "index" : [idx for idx in row .index ],
1736
1747
"values" : [
1737
1748
val .item () if hasattr (val , "item" ) else val for val in row .values
1738
1749
],
@@ -1756,12 +1767,7 @@ def serialize_row(row):
1756
1767
bf_result = bf_df .apply (serialize_row_remote , axis = 1 ).to_pandas ()
1757
1768
pd_result = pd_df .apply (serialize_row , axis = 1 )
1758
1769
1759
- # bf_result.dtype is 'string[pyarrow]' while pd_result.dtype is 'object'
1760
- # , ignore this mismatch by using check_dtype=False.
1761
- #
1762
- # bf_result.index[0].dtype is 'string[pyarrow]' while
1763
- # pd_result.index[0].dtype is 'object', ignore this mismatch by using
1764
- # check_index_type=False.
1770
+ # ignore known dtype difference between pandas and bigframes
1765
1771
pandas .testing .assert_series_equal (
1766
1772
pd_result , bf_result , check_dtype = False , check_index_type = False
1767
1773
)
0 commit comments