Skip to content

Commit a70d683

Browse files
chelsea-linshobsi
andauthored
test: restore original udf in the remote function test, use supported type in the test data (#996)
* test: fix test_df_apply_axis_1_complex by converting numpy value * undo all changes * improve numpy value handling in gcf code * enable the multiindex axis=1 test back * nit reword comment * Revert "improve numpy value handling in gcf code" This reverts commit f549683. --------- Co-authored-by: Shobhit Singh <[email protected]>
1 parent feacaf4 commit a70d683

File tree

1 file changed

+24
-18
lines changed

1 file changed

+24
-18
lines changed

tests/system/large/test_remote_function.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,7 +1670,11 @@ def analyze(row):
16701670
(3, 4): ["pq", "rs", "tu"],
16711671
(5.0, "six", 7): [8, 9, 10],
16721672
'raise Exception("hacked!")': [11, 12, 13],
1673-
}
1673+
},
1674+
# Default pandas index has non-numpy type, whereas bigframes is
1675+
# always numpy-based type, so let's use the index compatible
1676+
# with bigframes. See more details in b/369689696.
1677+
index=pandas.Index([0, 1, 2], dtype=pandas.Int64Dtype()),
16741678
),
16751679
id="all-kinds-of-column-names",
16761680
),
@@ -1681,17 +1685,22 @@ def analyze(row):
16811685
"y": [1.5, 3.75, 5],
16821686
"z": ["pq", "rs", "tu"],
16831687
},
1684-
index=pandas.MultiIndex.from_tuples(
1685-
[
1686-
("a", 100),
1687-
("a", 200),
1688-
("b", 300),
1689-
]
1688+
index=pandas.MultiIndex.from_frame(
1689+
pandas.DataFrame(
1690+
{
1691+
"idx0": pandas.Series(
1692+
["a", "a", "b"], dtype=pandas.StringDtype()
1693+
),
1694+
"idx1": pandas.Series(
1695+
[100, 200, 300], dtype=pandas.Int64Dtype()
1696+
),
1697+
}
1698+
)
16901699
),
16911700
),
16921701
id="multiindex",
16931702
marks=pytest.mark.skip(
1694-
reason="TODO(b/368639580) revert this skip after fix"
1703+
reason="TODO: revert this skip after this pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/59908"
16951704
),
16961705
),
16971706
pytest.param(
@@ -1701,6 +1710,10 @@ def analyze(row):
17011710
[20, 3.75, "rs"],
17021711
[30, 8.0, "tu"],
17031712
],
1713+
# Default pandas index has non-numpy type, whereas bigframes is
1714+
# always numpy-based type, so let's use the index compatible
1715+
# with bigframes. See more details in b/369689696.
1716+
index=pandas.Index([0, 1, 2], dtype=pandas.Int64Dtype()),
17041717
columns=pandas.MultiIndex.from_arrays(
17051718
[
17061719
["first", "last_two", "last_two"],
@@ -1729,10 +1742,8 @@ def test_df_apply_axis_1_complex(session, pd_df):
17291742

17301743
def serialize_row(row):
17311744
custom = {
1732-
"name": row.name.item() if hasattr(row.name, "item") else row.name,
1733-
"index": [
1734-
idx.item() if hasattr(idx, "item") else idx for idx in row.index
1735-
],
1745+
"name": row.name,
1746+
"index": [idx for idx in row.index],
17361747
"values": [
17371748
val.item() if hasattr(val, "item") else val for val in row.values
17381749
],
@@ -1756,12 +1767,7 @@ def serialize_row(row):
17561767
bf_result = bf_df.apply(serialize_row_remote, axis=1).to_pandas()
17571768
pd_result = pd_df.apply(serialize_row, axis=1)
17581769

1759-
# bf_result.dtype is 'string[pyarrow]' while pd_result.dtype is 'object'
1760-
# , ignore this mismatch by using check_dtype=False.
1761-
#
1762-
# bf_result.index[0].dtype is 'string[pyarrow]' while
1763-
# pd_result.index[0].dtype is 'object', ignore this mismatch by using
1764-
# check_index_type=False.
1770+
# ignore known dtype difference between pandas and bigframes
17651771
pandas.testing.assert_series_equal(
17661772
pd_result, bf_result, check_dtype=False, check_index_type=False
17671773
)

0 commit comments

Comments
 (0)