apache
diff --git a/‎.github/workflows/benchmark.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/benchmark.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/build_and_test.yml‎
Lines changed: 10 additions & 3 deletions b/‎.github/workflows/build_and_test.yml‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎.github/workflows/build_main.yml‎
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/build_main.yml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/build_python_3.13_nogil.yml‎
Lines changed: 0 additions & 48 deletions b/‎.github/workflows/build_python_3.13_nogil.yml‎
Lines changed: 0 additions & 48 deletions
diff --git a/‎.github/workflows/labeler.yml‎
Lines changed: 1 addition & 154 deletions b/‎.github/workflows/labeler.yml‎
Lines changed: 1 addition & 154 deletions
diff --git a/‎.github/workflows/maven_test.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/maven_test.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/publish_snapshot.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/publish_snapshot.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/python_hosted_runner_test.yml‎
Lines changed: 1 addition & 10 deletions b/‎.github/workflows/python_hosted_runner_test.yml‎
Lines changed: 1 addition & 10 deletions
diff --git a/‎BUG_TICKET.txt‎
Lines changed: 14 additions & 0 deletions b/‎BUG_TICKET.txt‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎R/run-tests.sh‎
Lines changed: 2 additions & 2 deletions b/‎R/run-tests.sh‎
Lines changed: 2 additions & 2 deletions
@@ -136,6 +136,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
+      max-parallel: 20
       matrix:
         split: ${{fromJSON(needs.matrix-gen.outputs.matrix)}}
     env:
 
@@ -105,6 +105,8 @@ jobs:
             buf=true
             ui=true
             docs=true
+            java17=true
+            java25=true
           else
             pyspark_install=false
             pandas=false
@@ -116,6 +118,8 @@ jobs:
             buf=false
             ui=false
             docs=false
+            java17=false
+            java25=false
           fi
           build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,utils-java,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,profiler,protobuf,yarn,connect,sql,hive,pipelines"`
           precondition="
@@ -128,8 +132,8 @@ jobs:
               \"tpcds-1g\": \"$tpcds\",
               \"docker-integration-tests\": \"$docker\",
               \"lint\" : \"true\",
-              \"java17\" : \"$build\",
-              \"java25\" : \"$build\",
+              \"java17\" : \"$java17\",
+              \"java25\" : \"$java25\",
               \"docs\" : \"$docs\",
               \"yarn\" : \"$yarn\",
               \"k8s-integration-tests\" : \"$kubernetes\",
@@ -238,6 +242,7 @@ jobs:
     timeout-minutes: 150
     strategy:
       fail-fast: false
+      max-parallel: 20
       matrix:
         java:
           - ${{ inputs.java }}
@@ -522,6 +527,7 @@ jobs:
         --security-opt seccomp=unconfined
     strategy:
       fail-fast: false
+      max-parallel: 20
       matrix:
         java:
           - ${{ inputs.java }}
@@ -626,6 +632,7 @@ jobs:
       env: ${{ fromJSON(inputs.envs) }}
       shell: 'script -q -e -c "bash {0}"'
       run: |
+        uname -a
         for py in $(echo $PYTHON_TO_TEST | tr "," "\n")
         do
           $py --version
@@ -667,7 +674,7 @@ jobs:
         verbose: true
     - name: Upload test results to Codecov
       env: ${{ fromJSON(inputs.envs) }}
-      if: (!cancelled()) && github.repository == 'apache/spark'
+      if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true'
       uses: codecov/codecov-action@v5
       with:
         report_type: 'test_results'
 
@@ -30,5 +30,3 @@ jobs:
       packages: write
     name: Run
     uses: ./.github/workflows/build_and_test.yml
-    secrets:
-      codecov_token: ${{ secrets.CODECOV_TOKEN }}
@@ -24,169 +24,16 @@
 # See also https://github.community/t/specify-check-suite-when-creating-a-checkrun/118380/10
 
 name: "On pull requests"
-on:
-  pull_request_target:
-    types: [opened, edited, reopened]
+on: pull_request_target
 
 jobs:
   label:
     name: Label pull requests
     runs-on: ubuntu-latest
     permissions:
       contents: read
-      pull-requests: write
     steps:
     - uses: actions/labeler@v5
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"
         sync-labels: true
-
-  jira-info:
-    name: Comment JIRA information
-    runs-on: ubuntu-latest
-    permissions:
-      pull-requests: write
-    steps:
-    - name: Extract JIRA IDs and comment
-      uses: actions/github-script@v7
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        script: |
-          const prTitle = context.payload.pull_request.title;
-          const prNumber = context.payload.pull_request.number;
-
-          // Extract JIRA IDs from PR title
-          const jiraIdRegex = /\bSPARK-\d+\b/g;
-          const jiraIds = prTitle.match(jiraIdRegex);
-
-          // If no JIRA IDs found, check for [MINOR] tag
-          if (!jiraIds || jiraIds.length === 0) {
-            const minorRegex = /^\[MINOR\]/i;
-            if (minorRegex.test(prTitle)) {
-              console.log('PR title has [MINOR] tag, skipping');
-              return;
-            }
-
-            // Post reminder comment
-            const reminderComment = `## ⚠️ Pull Request Title Validation\n\nThis pull request title does not contain a JIRA issue ID.\n\nPlease update the title to either:\n- Include a JIRA ID: \`[SPARK-12345] Your description\`\n- Mark as minor change: \`[MINOR] Your description\`\n\nFor minor changes that don't require a JIRA ticket (e.g., typo fixes), please prefix the title with \`[MINOR]\`.\n\n---\n*This comment was automatically generated by GitHub Actions*`;
-
-            const comments = await github.rest.issues.listComments({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: prNumber
-            });
-
-            const botComment = comments.data.find(comment =>
-              comment.user.type === 'Bot' &&
-              (comment.body.includes('## JIRA Issue Information') || comment.body.includes('## ⚠️ Pull Request Title Validation'))
-            );
-
-            if (botComment) {
-              await github.rest.issues.updateComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: botComment.id,
-                body: reminderComment
-              });
-              console.log('Updated reminder comment');
-            } else {
-              await github.rest.issues.createComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number: prNumber,
-                body: reminderComment
-              });
-              console.log('Created reminder comment');
-            }
-            return;
-          }
-
-          // Remove duplicates
-          const uniqueJiraIds = [...new Set(jiraIds)];
-          console.log(`Found JIRA IDs: ${uniqueJiraIds.join(', ')}`);
-
-          // Fetch JIRA information for each ID
-          const jiraBaseUrl = 'https://issues.apache.org/jira';
-          const jiraInfos = [];
-
-          for (const jiraId of uniqueJiraIds) {
-            try {
-              const response = await fetch(`${jiraBaseUrl}/rest/api/2/issue/${jiraId}`);
-
-              if (!response.ok) {
-                jiraInfos.push({
-                  id: jiraId,
-                  type: 'Unknown',
-                  error: `Failed to fetch (HTTP ${response.status})`
-                });
-                continue;
-              }
-
-              const data = await response.json();
-              const fields = data.fields;
-
-              jiraInfos.push({
-                id: jiraId,
-                type: fields.issuetype?.name || 'Unknown',
-                summary: fields.summary || 'N/A',
-                assignee: fields.assignee ? fields.assignee.displayName : 'None',
-                status: fields.status ? fields.status.name : 'Unknown',
-                affected: fields.versions ? fields.versions.map(v => v.name) : []
-              });
-            } catch (error) {
-              console.error(`Error fetching ${jiraId}:`, error);
-              jiraInfos.push({
-                id: jiraId,
-                type: 'Unknown',
-                error: error.message
-              });
-            }
-          }
-
-          // Format comment
-          let commentBody = '## JIRA Issue Information\n\n';
-
-          for (const info of jiraInfos) {
-            if (info.error) {
-              commentBody += `=== ${info.type} ${info.id} ===\n`;
-              commentBody += `Error: ${info.error}\n\n`;
-            } else {
-              commentBody += `=== ${info.type} ${info.id} ===\n`;
-              commentBody += `Summary: ${info.summary}\n`;
-              commentBody += `Assignee: ${info.assignee}\n`;
-              commentBody += `Status: ${info.status}\n`;
-              commentBody += `Affected: ${JSON.stringify(info.affected)}\n\n`;
-            }
-          }
-
-          commentBody += '---\n*This comment was automatically generated by GitHub Actions*';
-
-          // Check if there's an existing comment from this action
-          const comments = await github.rest.issues.listComments({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            issue_number: prNumber
-          });
-
-          const botComment = comments.data.find(comment =>
-            comment.user.type === 'Bot' &&
-            (comment.body.includes('## JIRA Issue Information') || comment.body.includes('## ⚠️ Pull Request Title Validation'))
-          );
-
-          if (botComment) {
-            await github.rest.issues.updateComment({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              comment_id: botComment.id,
-              body: commentBody
-            });
-            console.log('Updated existing comment');
-          } else {
-            await github.rest.issues.createComment({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: prNumber,
-              body: commentBody
-            });
-            console.log('Created new comment');
-          }
@@ -60,6 +60,7 @@ jobs:
     # timeout-minutes: 150
     strategy:
       fail-fast: false
+      max-parallel: 20
       matrix:
         java:
           - ${{ inputs.java }}
 
@@ -36,6 +36,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
+      max-parallel: 20
       matrix:
         # keep in sync with default value of workflow_dispatch input 'branch'
         branch: ${{ fromJSON( inputs.branch || '["master", "branch-4.1", "branch-4.0", "branch-3.5"]' ) }}
 
@@ -63,6 +63,7 @@ jobs:
     # timeout-minutes: 150
     strategy:
       fail-fast: false
+      max-parallel: 20
       matrix:
         java:
           - ${{ inputs.java }}
@@ -166,16 +167,6 @@ jobs:
             echo "Python Packaging Tests Enabled!"
           fi
           ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --python-executables "$PYTHON_TO_TEST"
-      - name: Upload test results to Codecov
-        env: ${{ fromJSON(inputs.envs) }}
-        if: (!cancelled()) && github.repository == 'apache/spark'
-        uses: codecov/codecov-action@v5
-        with:
-          report_type: 'test_results'
-          files: '**/target/test-reports/*.xml'
-          flags: ${{ env.PYTHON_TO_TEST }}-${{ inputs.branch }}-${{ inputs.os }}
-          name: PySpark-Test-Results
-          token: ${{ secrets.codecov_token }}
       - name: Upload test results to report
         env: ${{ fromJSON(inputs.envs) }}
         if: always()
 
@@ -0,0 +1,14 @@
+Title: [PySpark] Consolidate per-eval-type logic in Python worker UDF execution
+Type: Improvement
+
+In the current Python worker UDF execution path, the logic for each {{PythonEvalType}} is spread across three layers:
+
+- *Wrapper functions* (e.g. {{wrap_arrow_batch_iter_udf}}, {{wrap_scalar_arrow_udf}}, {{wrap_grouped_map_arrow_udf}}, etc.) that pre-process UDF arguments and post-process results.
+- *Mapper blocks* in {{read_udfs}} that handle input column selection, UDF invocation, and output assembly — often shared across unrelated eval types.
+- *Serializers* (e.g. {{ArrowStreamUDFSerializer}}) that mix data transformation (struct flatten/wrap, pandas conversion) with I/O concerns.
+
+To understand the full data flow for a single eval type, one has to trace through all three layers and reconstruct the implicit contracts between them (e.g. the serializer yields {{[batch]}} lists, the mapper indexes into them, the wrapper attaches {{(result, type)}} tuples).
+
+The goal is to make each eval type self-contained: all input transformation, UDF invocation, result verification, and output transformation should be co-located in one place in {{read_udfs}}, with serializers reduced to pure I/O.
+
+This can be done incrementally, one eval type at a time, starting with the simpler Arrow-based ones.
@@ -30,9 +30,9 @@ if [[ $(echo $SPARK_AVRO_JAR_PATH | wc -l) -eq 1 ]]; then
 fi
 
 if [ -z "$SPARK_JARS" ]; then
-  SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+  SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Xss4M" --conf spark.executor.extraJavaOptions="-Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 else
-  SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+  SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Xss4M" --conf spark.executor.extraJavaOptions="-Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 fi
 
 FAILED=$((PIPESTATUS[0]||$FAILED))