diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml index da06bff87b..0ad40c1932 100644 --- a/.github/workflows/pr_build_macos.yml +++ b/.github/workflows/pr_build_macos.yml @@ -132,14 +132,6 @@ jobs: value: | org.apache.spark.sql.CometToPrettyStringSuite - exclude: - # Skip fuzz suite for Spark 4.0 - # https://github.com/apache/datafusion-comet/issues/2965 - - profile: - name: "Spark 4.0, JDK 17, Scala 2.13" - suite: - name: "fuzz" - fail-fast: false name: ${{ matrix.os }}/${{ matrix.profile.name }} [${{ matrix.suite.name }}] runs-on: ${{ matrix.os }} @@ -152,6 +144,14 @@ jobs: jdk-version: ${{ matrix.profile.java_version }} jdk-architecture: aarch64 protoc-architecture: aarch_64 + - name: Set thread thresholds envs for spark test on macOS + # see: https://github.com/apache/datafusion-comet/issues/2965 + shell: bash + run: | + echo "SPARK_TEST_SQL_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV + echo "SPARK_TEST_SQL_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV + echo "SPARK_TEST_HIVE_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV + echo "SPARK_TEST_HIVE_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV - name: Java test steps uses: ./.github/actions/java-test with: diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala index 8011e5e70d..7dba24bff7 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala @@ -89,6 +89,14 @@ abstract class CometTestBase // this is an edge case, and we expect most users to allow sorts on floating point, so we // enable this for the tests conf.set(CometConf.getExprAllowIncompatConfigKey("SortOrder"), "true") + // For spark 4.0 tests, we need limit the thread threshold to avoid OOM, see: + // https://github.com/apache/datafusion-comet/issues/2965 + conf.set( + "spark.sql.shuffleExchange.maxThreadThreshold", + sys.env.getOrElse("SPARK_TEST_SQL_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD", "1024")) + conf.set( + "spark.sql.resultQueryStage.maxThreadThreshold", + sys.env.getOrElse("SPARK_TEST_SQL_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD", "1024")) conf }