Skip to content

Commit 4750a63

Browse files
sfc-gh-jkewsfc-gh-joshisfc-gh-mvashishtha
authored
SNOW-2304052 - Add the ability to filter out certain tests when running under hybrid mode (#3750)
SNOW-2304052 - Add the ability to filter out certain tests when running under hybrid mode Not all tests pass under hybrid mode, largely because they were built for the snowflake backend specifically. Still we want to run a significant number of these with hybrid mode enabled to help triage and categorize potiential semantic issues. This PR adds the ability to park specific tests (itegration only) with a pytest marker, @pytest.mark.no_hybrid for skipping. We also have a reference csv of a test run with hybrid which is used for skipping certain tests. This allows us to guard against regressions, provide for incremental development of hybrid, and triage existing differences to determine where effort should be spent. Co-authored-by: Jonathan Shi <[email protected]> Co-authored-by: Mahesh Vashishtha <[email protected]>
1 parent 8dd8630 commit 4750a63

File tree

5 files changed

+20772
-0
lines changed

5 files changed

+20772
-0
lines changed

tests/integ/modin/conftest.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
33
#
44

5+
import os
56
import pathlib
67
import re
78
from datetime import datetime
@@ -47,6 +48,68 @@ def setup_modin_hybrid_mode(pytestconfig):
4748
MODIN_HYBRID_TEST_MODE_ENABLED = False
4849

4950

51+
def read_hybrid_known_failures():
52+
"""
53+
Read `modin_hybrid_integ_results.csv` and create a pandas
54+
dataframe filtered down to only the failed tests. You can regenerate
55+
this file by:
56+
* Collecting the hybrid test results with pytest:
57+
pytest tests/integ/modin -n 10
58+
--enable_modin_hybrid_mode
59+
--csv tests/integ/modin/modin_hybrid_integ_results.csv
60+
* (Recommended) Pre-Filtering the results to reduce the file size:
61+
import pandas as pd
62+
df = pd.read_csv("tests/integ/modin/modin_hybrid_integ_results.csv")
63+
filtered = df[["module", "name", "message", "status"]][
64+
df["status"].isin(["failed", "xfailed", "error"])
65+
]
66+
filtered.to_csv("tests/integ/modin/modin_hybrid_integ_results.csv")
67+
"""
68+
HYBRID_RESULTS_PATH = os.path.normpath(
69+
os.path.join(
70+
os.path.dirname(__file__), "../modin/modin_hybrid_integ_results.csv"
71+
)
72+
)
73+
df = pandas.read_csv(HYBRID_RESULTS_PATH)
74+
return df[["module", "name", "message", "status"]][
75+
df["status"].isin(["failed", "xfailed", "error"])
76+
]
77+
78+
79+
HYBRID_KNOWN_FAILURES = read_hybrid_known_failures()
80+
81+
82+
def is_hybrid_known_failure(module_name, test_name) -> dict[bool, str]:
83+
"""
84+
Determine whether the module/test is a known hybrid mode failure
85+
and return the result along with the error message if applicable.
86+
"""
87+
module_mask = HYBRID_KNOWN_FAILURES.module == module_name
88+
testname_mask = HYBRID_KNOWN_FAILURES.name == test_name
89+
test_data = HYBRID_KNOWN_FAILURES[module_mask & testname_mask]
90+
failed = len(test_data) >= 1
91+
msg = None
92+
if failed:
93+
msg = test_data["message"].iloc[0]
94+
return (failed, msg)
95+
96+
97+
def pytest_runtest_setup(item):
98+
"""
99+
pytest hook to filter out tests when running under hybrid mode
100+
"""
101+
config = item.config
102+
if not config.option.enable_modin_hybrid_mode:
103+
return
104+
# When a test is annotated with @pytest.mark.skip_hybrid it will be skipped
105+
if len(list(item.iter_markers(name="skip_hybrid"))) > 0:
106+
pytest.skip("Skipped for Hybrid: pytest.mark.skip_hybrid")
107+
# Check the known failure list and skip those with a message
108+
(failed, msg) = is_hybrid_known_failure(item.module.__name__, item.name)
109+
if failed:
110+
pytest.skip(f"Skipped for Hybrid: {msg}")
111+
112+
50113
@pytest.fixture(scope="module", autouse=True)
51114
def f(session):
52115
# create a snowpark pandas dataframe so that modin keeps an empty query compiler

0 commit comments

Comments
 (0)