diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index fd1632fe88c09..6abb080b417df 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -41,14 +41,14 @@ on: description: Additional environment variables to set when running the tests. Should be in JSON format. required: false type: string - default: '{"PYSPARK_IMAGE_TO_TEST": "python-311", "PYTHON_TO_TEST": "python3.11"}' + default: '{"PYSPARK_IMAGE_TO_TEST": "pypy-310", "PYTHON_TO_TEST": "pypy3"}' jobs: description: >- Jobs to run, and should be in JSON format. The values should be matched with the job's key defined in this file, e.g., build. See precondition job below. required: false type: string - default: '' + default: '{"pyspark": "true", "pyspark-pandas": "true"}' secrets: codecov_token: description: The upload token of codecov. @@ -607,7 +607,7 @@ jobs: run: | for py in $(echo $PYTHON_TO_TEST | tr "," "\n") do - echo $py + $py --version $py -m pip list done - name: Install Conda for pip packaging test diff --git a/dev/spark-test-image/pypy-310/Dockerfile b/dev/spark-test-image/pypy-310/Dockerfile index 73ac6aca04cf1..dbbb035b7bafc 100644 --- a/dev/spark-test-image/pypy-310/Dockerfile +++ b/dev/spark-test-image/pypy-310/Dockerfile @@ -50,6 +50,7 @@ RUN apt-get update && apt-get install -y \ libssl-dev \ libtiff5-dev \ libxml2-dev \ + libxslt-dev \ openjdk-17-jdk-headless \ pkg-config \ qpdf \ @@ -63,9 +64,9 @@ RUN apt-get update && apt-get install -y \ RUN add-apt-repository ppa:pypy/ppa -RUN mkdir -p /usr/local/pypy/pypy3.10 && \ - curl -sqL https://downloads.python.org/pypy/pypy3.10-v7.3.17-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.10 --strip-components=1 && \ - ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \ - ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3 +RUN mkdir -p /usr/local/pypy/pypy3.11 && \ + curl -sqL https://downloads.python.org/pypy/pypy3.11-v7.3.20-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.11 --strip-components=1 && \ + ln -sf /usr/local/pypy/pypy3.11/bin/pypy /usr/local/bin/pypy3.10 && \ + ln -sf /usr/local/pypy/pypy3.11/bin/pypy /usr/local/bin/pypy3 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.3.1' scipy coverage matplotlib lxml diff --git a/python/pyspark/sql/tests/streaming/test_streaming.py b/python/pyspark/sql/tests/streaming/test_streaming.py index a0e85c73aedf6..89864f50568be 100644 --- a/python/pyspark/sql/tests/streaming/test_streaming.py +++ b/python/pyspark/sql/tests/streaming/test_streaming.py @@ -16,9 +16,11 @@ # import os +import platform import shutil import tempfile import time +import unittest from pyspark.sql import Row from pyspark.sql.functions import lit @@ -503,6 +505,9 @@ def test_streaming_drop_duplicate_within_watermark(self): self.assertTrue(len(result) >= 6 and len(result) <= 9) +@unittest.skipIf( + "pypy" in platform.python_implementation().lower(), "cannot run in environment pypy" +) class StreamingTests(StreamingTestsMixin, ReusedSQLTestCase): def _assert_exception_tree_contains_msg(self, exception, msg): e = exception @@ -514,7 +519,6 @@ def _assert_exception_tree_contains_msg(self, exception, msg): if __name__ == "__main__": - import unittest from pyspark.sql.tests.streaming.test_streaming import * # noqa: F401 try: diff --git a/python/pyspark/sql/tests/streaming/test_streaming_foreach.py b/python/pyspark/sql/tests/streaming/test_streaming_foreach.py index b29338e7f59e7..4b11aa3bdf1b2 100644 --- a/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +++ b/python/pyspark/sql/tests/streaming/test_streaming_foreach.py @@ -16,7 +16,9 @@ # import os +import platform import tempfile +import unittest from pyspark.testing.sqlutils import ReusedSQLTestCase @@ -278,12 +280,14 @@ class WriterWithNonCallableClose(WithProcess): tester.assert_invalid_writer(WriterWithNonCallableClose(), "ATTRIBUTE_NOT_CALLABLE") +@unittest.skipIf( + "pypy" in platform.python_implementation().lower(), "cannot run in environment pypy" +) class StreamingTestsForeach(StreamingTestsForeachMixin, ReusedSQLTestCase): pass if __name__ == "__main__": - import unittest from pyspark.sql.tests.streaming.test_streaming_foreach import * # noqa: F401 try: diff --git a/python/pyspark/streaming/tests/test_context.py b/python/pyspark/streaming/tests/test_context.py index f3fc501a57fca..7800591e70afa 100644 --- a/python/pyspark/streaming/tests/test_context.py +++ b/python/pyspark/streaming/tests/test_context.py @@ -15,14 +15,19 @@ # limitations under the License. # import os +import platform import struct import tempfile import time +import unittest from pyspark.streaming import StreamingContext from pyspark.testing.streamingutils import PySparkStreamingTestCase +@unittest.skipIf( + "pypy" in platform.python_implementation().lower(), "cannot run in environment pypy" +) class StreamingContextTests(PySparkStreamingTestCase): duration = 0.1 setupCalled = False @@ -171,7 +176,6 @@ def test_await_termination_or_timeout(self): if __name__ == "__main__": - import unittest from pyspark.streaming.tests.test_context import * # noqa: F401 try: diff --git a/python/pyspark/streaming/tests/test_listener.py b/python/pyspark/streaming/tests/test_listener.py index 7769f3bedaad9..c89f455dee9b7 100644 --- a/python/pyspark/streaming/tests/test_listener.py +++ b/python/pyspark/streaming/tests/test_listener.py @@ -14,10 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +import platform +import unittest + from pyspark.streaming import StreamingListener from pyspark.testing.streamingutils import PySparkStreamingTestCase +@unittest.skipIf( + "pypy" in platform.python_implementation().lower(), "cannot run in environment pypy" +) class StreamingListenerTests(PySparkStreamingTestCase): duration = 0.5 @@ -147,7 +154,6 @@ def func(dstream): if __name__ == "__main__": - import unittest from pyspark.streaming.tests.test_listener import * # noqa: F401 try: