Skip to content

Commit 2b07d77

Browse files
dongjoon-hyunHyukjinKwon
authored andcommitted
[SPARK-46076][PYTHON][TESTS] Remove unittest deprecated alias usage for Python 3.12
### What changes were proposed in this pull request? This PR aims to remove `unittest` alias usage for Python 3.12. Currently, it fails like the following. - https://github.com/apache/spark/actions/runs/6971394284/job/18971420822 ``` ====================================================================== ERROR [0.554s]: test_find_spark_home (pyspark.tests.test_util.UtilTests.test_find_spark_home) ---------------------------------------------------------------------- Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/tests/test_util.py", line 83, in test_find_spark_home self.assertEquals(origin, _find_spark_home()) ^^^^^^^^^^^^^^^^^ AttributeError: 'UtilTests' object has no attribute 'assertEquals'. Did you mean: 'assertEqual'? ``` ### Why are the changes needed? Python 3.12 removes the following deprecated aliases. - https://docs.python.org/3/whatsnew/3.12.html#id3 <img width="802" alt="Screenshot 2023-11-23 at 12 52 33 PM" src="https://github.com/apache/spark/assets/9700541/0158c1a4-fcfc-4a02-85c5-7fcbd6c6a034"> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs with Python 3.9. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43986 from dongjoon-hyun/SPARK-46076. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent 4670410 commit 2b07d77

17 files changed

+61
-63
lines changed

python/pyspark/ml/tests/test_linalg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ def test_unwrap_udt(self):
362362
Row(v2=unwrapped_vec(1, None, None, [1.0, 2.0, 3.0])),
363363
Row(v2=unwrapped_vec(0, 3, [1, 2], [1.0, 5.5])),
364364
]
365-
self.assertEquals(results, expected)
365+
self.assertEqual(results, expected)
366366

367367

368368
class MatrixUDTTests(MLlibTestCase):

python/pyspark/pandas/tests/indexes/test_base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ def test_index_basic(self):
6262
self.assert_eq(psdf.index.dtype, pdf.index.dtype)
6363

6464
self.assert_eq(ps.Index([])._summary(), "Index: 0 entries")
65-
with self.assertRaisesRegexp(ValueError, "The truth value of a Index is ambiguous."):
65+
with self.assertRaisesRegex(ValueError, "The truth value of a Index is ambiguous."):
6666
bool(ps.Index([1]))
67-
with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
67+
with self.assertRaisesRegex(TypeError, "Index.name must be a hashable type"):
6868
ps.Index([1, 2, 3], name=[(1, 2, 3)])
69-
with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
69+
with self.assertRaisesRegex(TypeError, "Index.name must be a hashable type"):
7070
ps.Index([1.0, 2.0, 3.0], name=[(1, 2, 3)])
7171

7272
def test_index_from_series(self):

python/pyspark/pandas/tests/indexes/test_category.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,9 @@ def test_categorical_index(self):
6868
self.assert_eq(psidx.codes, pd.Index(pidx.codes))
6969
self.assert_eq(psidx.ordered, pidx.ordered)
7070

71-
with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
71+
with self.assertRaisesRegex(TypeError, "Index.name must be a hashable type"):
7272
ps.CategoricalIndex([1, 2, 3], name=[(1, 2, 3)])
73-
with self.assertRaisesRegexp(
73+
with self.assertRaisesRegex(
7474
TypeError, "Cannot perform 'all' with this index type: CategoricalIndex"
7575
):
7676
ps.CategoricalIndex([1, 2, 3]).all()

python/pyspark/pandas/tests/indexes/test_datetime.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ def _disallow_nanoseconds(self, f):
6464
self.assertRaises(ValueError, lambda: f(freq="N"))
6565

6666
def test_datetime_index(self):
67-
with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
67+
with self.assertRaisesRegex(TypeError, "Index.name must be a hashable type"):
6868
ps.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"], name=[(1, 2)])
69-
with self.assertRaisesRegexp(
69+
with self.assertRaisesRegex(
7070
TypeError, "Cannot perform 'all' with this index type: DatetimeIndex"
7171
):
7272
ps.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"]).all()

python/pyspark/pandas/tests/indexes/test_timedelta.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ def test_timedelta_index(self):
9090
)
9191

9292
# ps.TimedeltaIndex(ps.Index([1, 2, 3]))
93-
with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
93+
with self.assertRaisesRegex(TypeError, "Index.name must be a hashable type"):
9494
ps.TimedeltaIndex([timedelta(1), timedelta(microseconds=2)], name=[(1, 2)])
95-
with self.assertRaisesRegexp(
95+
with self.assertRaisesRegex(
9696
TypeError, "Cannot perform 'all' with this index type: TimedeltaIndex"
9797
):
9898
psidx.all()

python/pyspark/sql/tests/connect/test_connect_basic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,7 +1824,7 @@ def test_observe(self):
18241824

18251825
self.assert_eq(cdf, df)
18261826

1827-
self.assertEquals(cobservation.get, observation.get)
1827+
self.assertEqual(cobservation.get, observation.get)
18281828

18291829
observed_metrics = cdf.attrs["observed_metrics"]
18301830
self.assert_eq(len(observed_metrics), 1)
@@ -3449,11 +3449,11 @@ def test_can_create_multiple_sessions_to_different_remotes(self):
34493449
self.assertIsNotNone(self.spark._client)
34503450
# Creates a new remote session.
34513451
other = PySparkSession.builder.remote("sc://other.remote:114/").create()
3452-
self.assertNotEquals(self.spark, other)
3452+
self.assertNotEqual(self.spark, other)
34533453

34543454
# Gets currently active session.
34553455
same = PySparkSession.builder.remote("sc://other.remote.host:114/").getOrCreate()
3456-
self.assertEquals(other, same)
3456+
self.assertEqual(other, same)
34573457
same.release_session_on_close = False # avoid sending release to dummy connection
34583458
same.stop()
34593459

python/pyspark/sql/tests/connect/test_connect_column.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ def test_simple_binary_expressions(self):
379379
self.assertEqual(len(pdf.index), 4)
380380

381381
res = pd.DataFrame(data={"id": [0, 30, 60, 90]})
382-
self.assert_(pdf.equals(res), f"{pdf.to_string()} != {res.to_string()}")
382+
self.assertTrue(pdf.equals(res), f"{pdf.to_string()} != {res.to_string()}")
383383

384384
def test_literal_with_acceptable_type(self):
385385
for value, dataType in [

python/pyspark/sql/tests/pandas/test_pandas_map.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ def func(iterator):
394394

395395
for offheap in ["true", "false"]:
396396
with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}):
397-
self.assertEquals(
397+
self.assertEqual(
398398
self.spark.read.parquet(path).mapInPandas(func, "id long").head(), Row(0)
399399
)
400400
finally:

python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def test_input_nested_structs(self):
181181

182182
mirror = pandas_udf(lambda s: s, df.dtypes[0][1])
183183

184-
self.assertEquals(
184+
self.assertEqual(
185185
df.select(mirror(df.struct).alias("res")).first(),
186186
Row(
187187
res=Row(
@@ -194,13 +194,13 @@ def test_input_nested_maps(self):
194194
df = self.df_with_nested_maps
195195

196196
str_repr = pandas_udf(lambda s: s.astype(str), StringType())
197-
self.assertEquals(
197+
self.assertEqual(
198198
df.select(str_repr(df.attributes).alias("res")).first(),
199199
Row(res="{'personal': {'name': 'John', 'city': 'New York'}}"),
200200
)
201201

202202
extract_name = pandas_udf(lambda s: s.apply(lambda x: x["personal"]["name"]), StringType())
203-
self.assertEquals(
203+
self.assertEqual(
204204
df.select(extract_name(df.attributes).alias("res")).first(),
205205
Row(res="John"),
206206
)
@@ -209,7 +209,7 @@ def test_input_nested_arrays(self):
209209
df = self.df_with_nested_arrays
210210

211211
str_repr = pandas_udf(lambda s: s.astype(str), StringType())
212-
self.assertEquals(
212+
self.assertEqual(
213213
df.select(str_repr(df.nested_array).alias("res")).first(),
214214
Row(res="[array([1, 2, 3], dtype=int32) array([4, 5], dtype=int32)]"),
215215
)
@@ -1450,9 +1450,7 @@ def udf(x):
14501450

14511451
for offheap in ["true", "false"]:
14521452
with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}):
1453-
self.assertEquals(
1454-
self.spark.read.parquet(path).select(udf("id")).head(), Row(0)
1455-
)
1453+
self.assertEqual(self.spark.read.parquet(path).select(udf("id")).head(), Row(0))
14561454
finally:
14571455
shutil.rmtree(path)
14581456

python/pyspark/sql/tests/streaming/test_streaming.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_streaming_query_functions_basic(self):
3636
.start()
3737
)
3838
try:
39-
self.assertEquals(query.name, "test_streaming_query_functions_basic")
39+
self.assertEqual(query.name, "test_streaming_query_functions_basic")
4040
self.assertTrue(isinstance(query.id, str))
4141
self.assertTrue(isinstance(query.runId, str))
4242
self.assertTrue(query.isActive)

0 commit comments

Comments
 (0)