Skip to content

Commit 65336a0

Browse files
huanliwang-dbanishshri-db
authored andcommitted
[SPARK-53810][SS][TESTS] Split large TWS python tests into multiple small tests to speedup the CI
### What changes were proposed in this pull request? Split large TWS python tests into multiple small tests to speedup the CI ### Why are the changes needed? CI is slow now, splitting tests can help speed it up and it's easier for debug ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? test passes ### Was this patch authored or co-authored using generative AI tooling? no Closes #52531 from huanliwang-db/huanliwang-db/split-test. Authored-by: huanliwang-db <[email protected]> Signed-off-by: Anish Shrigondekar <[email protected]>
1 parent c393419 commit 65336a0

File tree

5 files changed

+131
-59
lines changed

5 files changed

+131
-59
lines changed

dev/sparktestsupport/modules.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,7 @@ def __hash__(self):
565565
"pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state",
566566
"pyspark.sql.tests.pandas.test_pandas_map",
567567
"pyspark.sql.tests.pandas.test_pandas_transform_with_state",
568+
"pyspark.sql.tests.pandas.test_pandas_transform_with_state_checkpoint_v2",
568569
"pyspark.sql.tests.pandas.test_pandas_udf",
569570
"pyspark.sql.tests.pandas.test_pandas_udf_grouped_agg",
570571
"pyspark.sql.tests.pandas.test_pandas_udf_scalar",
@@ -1125,6 +1126,7 @@ def __hash__(self):
11251126
"pyspark.sql.tests.connect.streaming.test_parity_listener",
11261127
"pyspark.sql.tests.connect.streaming.test_parity_foreach",
11271128
"pyspark.sql.tests.connect.streaming.test_parity_foreach_batch",
1129+
"pyspark.sql.tests.connect.streaming.test_parity_transform_with_state_pyspark",
11281130
"pyspark.sql.tests.connect.test_resources",
11291131
"pyspark.sql.tests.connect.shell.test_progress",
11301132
"pyspark.sql.tests.connect.test_df_debug",

python/pyspark/sql/tests/connect/pandas/test_parity_pandas_transform_with_state.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
from pyspark.sql.tests.pandas.test_pandas_transform_with_state import (
2020
TransformWithStateInPandasTestsMixin,
21-
TransformWithStateInPySparkTestsMixin,
2221
)
2322
from pyspark import SparkConf
2423
from pyspark.testing.connectutils import ReusedConnectTestCase
@@ -54,36 +53,6 @@ def test_schema_evolution_scenarios(self):
5453
pass
5554

5655

57-
class TransformWithStateInPySparkParityTests(
58-
TransformWithStateInPySparkTestsMixin, ReusedConnectTestCase
59-
):
60-
"""
61-
Spark connect parity tests for TransformWithStateInPySpark. Run every test case in
62-
`TransformWithStateInPySparkTestsMixin` in spark connect mode.
63-
"""
64-
65-
@classmethod
66-
def conf(cls):
67-
# Due to multiple inheritance from the same level, we need to explicitly setting configs in
68-
# both TransformWithStateInPySparkTestsMixin and ReusedConnectTestCase here
69-
cfg = SparkConf(loadDefaults=False)
70-
for base in cls.__bases__:
71-
if hasattr(base, "conf"):
72-
parent_cfg = base.conf()
73-
for k, v in parent_cfg.getAll():
74-
cfg.set(k, v)
75-
76-
# Extra removing config for connect suites
77-
if cfg._jconf is not None:
78-
cfg._jconf.remove("spark.master")
79-
80-
return cfg
81-
82-
@unittest.skip("Flaky in spark connect on CI. Skip for now. See SPARK-51368 for details.")
83-
def test_schema_evolution_scenarios(self):
84-
pass
85-
86-
8756
if __name__ == "__main__":
8857
from pyspark.sql.tests.connect.pandas.test_parity_pandas_transform_with_state import * # noqa: F401,E501
8958

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
import unittest
18+
19+
from pyspark.sql.tests.pandas.test_pandas_transform_with_state import (
20+
TransformWithStateInPySparkTestsMixin,
21+
)
22+
from pyspark import SparkConf
23+
from pyspark.testing.connectutils import ReusedConnectTestCase
24+
25+
26+
class TransformWithStateInPySparkParityTests(
27+
TransformWithStateInPySparkTestsMixin, ReusedConnectTestCase
28+
):
29+
"""
30+
Spark connect parity tests for TransformWithStateInPySpark. Run every test case in
31+
`TransformWithStateInPySparkTestsMixin` in spark connect mode.
32+
"""
33+
34+
@classmethod
35+
def conf(cls):
36+
# Due to multiple inheritance from the same level, we need to explicitly setting configs in
37+
# both TransformWithStateInPySparkTestsMixin and ReusedConnectTestCase here
38+
cfg = SparkConf(loadDefaults=False)
39+
for base in cls.__bases__:
40+
if hasattr(base, "conf"):
41+
parent_cfg = base.conf()
42+
for k, v in parent_cfg.getAll():
43+
cfg.set(k, v)
44+
45+
# Extra removing config for connect suites
46+
if cfg._jconf is not None:
47+
cfg._jconf.remove("spark.master")
48+
49+
return cfg
50+
51+
@unittest.skip("Flaky in spark connect on CI. Skip for now. See SPARK-51368 for details.")
52+
def test_schema_evolution_scenarios(self):
53+
pass
54+
55+
56+
if __name__ == "__main__":
57+
from pyspark.sql.tests.connect.streaming.test_parity_transform_with_state_pyspark import * # noqa: F401,E501
58+
59+
try:
60+
import xmlrunner
61+
62+
testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
63+
except ImportError:
64+
testRunner = None
65+
unittest.main(testRunner=testRunner, verbosity=2)

python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2011,22 +2011,6 @@ def conf(cls):
20112011
return cfg
20122012

20132013

2014-
class TransformWithStateInPandasWithCheckpointV2TestsMixin(TransformWithStateInPandasTestsMixin):
2015-
@classmethod
2016-
def conf(cls):
2017-
cfg = super().conf()
2018-
cfg.set("spark.sql.streaming.stateStore.checkpointFormatVersion", "2")
2019-
return cfg
2020-
2021-
2022-
class TransformWithStateInPySparkWithCheckpointV2TestsMixin(TransformWithStateInPySparkTestsMixin):
2023-
@classmethod
2024-
def conf(cls):
2025-
cfg = super().conf()
2026-
cfg.set("spark.sql.streaming.stateStore.checkpointFormatVersion", "2")
2027-
return cfg
2028-
2029-
20302014
class TransformWithStateInPandasTests(TransformWithStateInPandasTestsMixin, ReusedSQLTestCase):
20312015
pass
20322016

@@ -2035,18 +2019,6 @@ class TransformWithStateInPySparkTests(TransformWithStateInPySparkTestsMixin, Re
20352019
pass
20362020

20372021

2038-
class TransformWithStateInPandasWithCheckpointV2Tests(
2039-
TransformWithStateInPandasWithCheckpointV2TestsMixin, ReusedSQLTestCase
2040-
):
2041-
pass
2042-
2043-
2044-
class TransformWithStateInPySparkWithCheckpointV2Tests(
2045-
TransformWithStateInPySparkWithCheckpointV2TestsMixin, ReusedSQLTestCase
2046-
):
2047-
pass
2048-
2049-
20502022
if __name__ == "__main__":
20512023
from pyspark.sql.tests.pandas.test_pandas_transform_with_state import * # noqa: F401
20522024

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
import unittest
19+
20+
from pyspark.testing.sqlutils import ReusedSQLTestCase
21+
from pyspark.sql.tests.pandas.test_pandas_transform_with_state import (
22+
TransformWithStateInPandasTestsMixin,
23+
TransformWithStateInPySparkTestsMixin,
24+
)
25+
26+
27+
class TransformWithStateInPandasWithCheckpointV2TestsMixin(TransformWithStateInPandasTestsMixin):
28+
@classmethod
29+
def conf(cls):
30+
cfg = super().conf()
31+
cfg.set("spark.sql.streaming.stateStore.checkpointFormatVersion", "2")
32+
return cfg
33+
34+
35+
class TransformWithStateInPySparkWithCheckpointV2TestsMixin(TransformWithStateInPySparkTestsMixin):
36+
@classmethod
37+
def conf(cls):
38+
cfg = super().conf()
39+
cfg.set("spark.sql.streaming.stateStore.checkpointFormatVersion", "2")
40+
return cfg
41+
42+
43+
class TransformWithStateInPandasWithCheckpointV2Tests(
44+
TransformWithStateInPandasWithCheckpointV2TestsMixin, ReusedSQLTestCase
45+
):
46+
pass
47+
48+
49+
class TransformWithStateInPySparkWithCheckpointV2Tests(
50+
TransformWithStateInPySparkWithCheckpointV2TestsMixin, ReusedSQLTestCase
51+
):
52+
pass
53+
54+
55+
if __name__ == "__main__":
56+
from pyspark.sql.tests.pandas.test_pandas_transform_with_state_checkpoint_v2 import * # noqa: F401,E501
57+
58+
try:
59+
import xmlrunner
60+
61+
testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
62+
except ImportError:
63+
testRunner = None
64+
unittest.main(testRunner=testRunner, verbosity=2)

0 commit comments

Comments
 (0)