Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions .github/workflows/velox_backend_arm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,6 @@ jobs:
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
- name: Run TPC-H / TPC-DS with RAS
run: |
echo "JAVA_HOME: $JAVA_HOME"
cd $GITHUB_WORKSPACE/tools/gluten-it
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true

cpp-test-udf-test:
runs-on: ubuntu-24.04-arm
Expand Down
117 changes: 0 additions & 117 deletions .github/workflows/velox_backend_x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -321,20 +321,6 @@ jobs:
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
- name: Run TPC-H / TPC-DS with RAS
run: |
echo "JAVA_HOME: $JAVA_HOME"
cd $GITHUB_WORKSPACE/tools/gluten-it
SPARK41_CONF=""
if [ "${{ matrix.spark }}" = "spark-4.1" ]; then
SPARK41_CONF="--extra-conf=spark.sql.unionOutputPartitioning=false"
fi
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true $SPARK41_CONF \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true $SPARK41_CONF

tpc-test-centos7:
needs: build-native-lib-centos-7
Expand Down Expand Up @@ -382,15 +368,6 @@ jobs:
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1

# Run TPC-H / TPC-DS with RAS
cd /work/tools/gluten-it
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true
"

tpc-test-ubuntu-oom:
Expand Down Expand Up @@ -1044,100 +1021,6 @@ jobs:
**/gluten-ut/**/hs_err_*.log
**/gluten-ut/**/core.*

spark-test-spark35-ras:
needs: build-native-lib-centos-7
runs-on: ubuntu-22.04
env:
SPARK_TESTING: true
container: apache/gluten:centos-8-jdk8
steps:
- uses: actions/checkout@v4
- name: Download All Artifacts
uses: actions/download-artifact@v4
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/
- name: Download Arrow Jars
uses: actions/download-artifact@v4
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare
run: |
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools==77.0.3 && \
pip3 install pyspark==3.5.5 cython && \
pip3 install pandas==2.2.3 pyarrow==20.0.0
- name: Build and Run unit test for Spark 3.5.5 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
yum install -y java-17-openjdk-devel
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest
- name: Upload test report
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-test-log
path: |
**/target/*.log
**/gluten-ut/**/hs_err_*.log
**/gluten-ut/**/core.*

spark-test-spark35-slow-ras:
needs: build-native-lib-centos-7
runs-on: ubuntu-22.04
env:
SPARK_TESTING: true
container: apache/gluten:centos-8-jdk8
steps:
- uses: actions/checkout@v4
- name: Download All Artifacts
uses: actions/download-artifact@v4
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/
- name: Download Arrow Jars
uses: actions/download-artifact@v4
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Build and Run unit test for Spark 3.5.5 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
yum install -y java-17-openjdk-devel
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Ppaimon -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-test-log
path: |
**/target/*.log
**/gluten-ut/**/hs_err_*.log
**/gluten-ut/**/core.*

spark-test-spark35-smj:
needs: build-native-lib-centos-7
runs-on: ubuntu-22.04
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,13 @@ import org.apache.gluten.backendsapi.clickhouse.CHBackend
import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.execution.OffloadDeltaNode
import org.apache.gluten.extension.{DeltaPostTransformRules, OffloadDeltaFilter, OffloadDeltaProject}
import org.apache.gluten.extension.columnar.enumerated.RasOffload
import org.apache.gluten.extension.columnar.heuristic.HeuristicTransform
import org.apache.gluten.extension.columnar.validator.Validators
import org.apache.gluten.extension.injector.Injector
import org.apache.gluten.sql.shims.DeltaShimLoader

import org.apache.spark.SparkContext
import org.apache.spark.api.plugin.PluginContext
import org.apache.spark.sql.execution.{FilterExec, ProjectExec}
import org.apache.spark.util.SparkReflectionUtil

class CHDeltaComponent extends Component {
Expand All @@ -45,27 +43,14 @@ class CHDeltaComponent extends Component {

override def injectRules(injector: Injector): Unit = {
val legacy = injector.gluten.legacy
val ras = injector.gluten.ras
legacy.injectTransform {
c =>
val offload = Seq(OffloadDeltaNode(), OffloadDeltaProject(), OffloadDeltaFilter())
HeuristicTransform.Simple(
Validators.newValidator(new GlutenConfig(c.sqlConf), offload),
offload)
}
val offloads: Seq[RasOffload] = Seq(
RasOffload.from[ProjectExec](OffloadDeltaProject()),
RasOffload.from[FilterExec](OffloadDeltaFilter())
)
offloads.foreach(
offload =>
ras.injectRasRule(
c => RasOffload.Rule(offload, Validators.newValidator(new GlutenConfig(c.sqlConf)), Nil)))
DeltaPostTransformRules.rules.foreach {
r =>
legacy.injectPostTransform(_ => r)
ras.injectPostTransform(_ => r)
}
DeltaPostTransformRules.rules.foreach(r => legacy.injectPostTransform(_ => r))

DeltaShimLoader.getDeltaShims.registerExpressionExtension()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.gluten.extension.columnar.rewrite._
import org.apache.gluten.extension.columnar.transition.{InsertTransitions, RemoveTransitions}
import org.apache.gluten.extension.columnar.validator.{Validator, Validators}
import org.apache.gluten.extension.injector.{Injector, SparkInjector}
import org.apache.gluten.extension.injector.GlutenInjector.{LegacyInjector, RasInjector}
import org.apache.gluten.extension.injector.GlutenInjector.LegacyInjector
import org.apache.gluten.parser.{GlutenCacheFilesSqlParser, GlutenClickhouseSqlParser}
import org.apache.gluten.sql.shims.SparkShimLoader

Expand All @@ -46,15 +46,14 @@ class CHRuleApi extends RuleApi {
override def injectRules(injector: Injector): Unit = {
injectSpark(injector.spark)
injectLegacy(injector.gluten.legacy)
injectRas(injector.gluten.ras)
}
}

object CHRuleApi {

/**
* Registers Spark rules or extensions, except for Gluten's columnar rules that are supposed to be
* injected through [[injectLegacy]] / [[injectRas]].
* injected through [[injectLegacy]].
*/
private def injectSpark(injector: SparkInjector): Unit = {
// Inject the regular Spark rules directly.
Expand Down Expand Up @@ -164,22 +163,6 @@ object CHRuleApi {
injector.injectFinal(_ => RemoveFallbackTagRule())
}

/**
* Registers Gluten's columnar rules. These rules will be executed only when RAS (relational
* algebra selector) is enabled by spark.gluten.ras.enabled=true.
*
* These rules are covered by CI test job spark-test-spark35-ras.
*/
private def injectRas(injector: RasInjector): Unit = {
// CH backend doesn't work with RAS at the moment. Inject a rule that aborts any
// execution calls.
injector.injectPreTransform(
_ =>
new SparkPlanRules.AbortRule(
"Clickhouse backend doesn't yet have RAS support, please try disabling RAS and" +
" rerunning the application"))
}

/**
* Since https://github.com/apache/incubator-gluten/pull/883.
*
Expand Down
7 changes: 0 additions & 7 deletions backends-velox/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,6 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-ras-common</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@ package org.apache.gluten.component
import org.apache.gluten.backendsapi.velox.VeloxBackend
import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.extension.{DeltaPostTransformRules, OffloadDeltaFilter, OffloadDeltaProject, OffloadDeltaScan}
import org.apache.gluten.extension.columnar.enumerated.RasOffload
import org.apache.gluten.extension.columnar.heuristic.HeuristicTransform
import org.apache.gluten.extension.columnar.validator.Validators
import org.apache.gluten.extension.injector.Injector

import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec}
import org.apache.spark.util.SparkReflectionUtil

class VeloxDeltaComponent extends Component {
Expand All @@ -38,7 +36,6 @@ class VeloxDeltaComponent extends Component {

override def injectRules(injector: Injector): Unit = {
val legacy = injector.gluten.legacy
val ras = injector.gluten.ras
legacy.injectTransform {
c =>
val offload = Seq(OffloadDeltaScan(), OffloadDeltaProject(), OffloadDeltaFilter())
Expand All @@ -47,19 +44,6 @@ class VeloxDeltaComponent extends Component {
Validators.newValidator(new GlutenConfig(c.sqlConf), offload),
offload)
}
val offloads: Seq[RasOffload] = Seq(
RasOffload.from[FileSourceScanExec](OffloadDeltaScan()),
RasOffload.from[ProjectExec](OffloadDeltaProject()),
RasOffload.from[FilterExec](OffloadDeltaFilter())
)
offloads.foreach(
offload =>
ras.injectRasRule(
c => RasOffload.Rule(offload, Validators.newValidator(new GlutenConfig(c.sqlConf)), Nil)))
DeltaPostTransformRules.rules.foreach {
r =>
legacy.injectPostTransform(_ => r)
ras.injectPostTransform(_ => r)
}
DeltaPostTransformRules.rules.foreach(r => legacy.injectPostTransform(_ => r))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,11 @@
package org.apache.gluten.component

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.extension.columnar.enumerated.RasOffload
import org.apache.gluten.extension.columnar.heuristic.HeuristicTransform
import org.apache.gluten.extension.columnar.validator.Validators
import org.apache.gluten.extension.injector.Injector

import org.apache.spark.sql.execution.command.ExecutedCommandExec
import org.apache.spark.sql.execution.datasources.v2.{LeafV2CommandExec, OffloadDeltaCommand}
import org.apache.spark.sql.execution.datasources.v2.OffloadDeltaCommand

class VeloxDelta33WriteComponent extends Component {
override def name(): String = "velox-delta33-write"
Expand All @@ -32,7 +30,6 @@ class VeloxDelta33WriteComponent extends Component {

override def injectRules(injector: Injector): Unit = {
val legacy = injector.gluten.legacy
val ras = injector.gluten.ras
legacy.injectTransform {
c =>
val offload = Seq(
Expand All @@ -42,13 +39,5 @@ class VeloxDelta33WriteComponent extends Component {
Validators.newValidator(new GlutenConfig(c.sqlConf), offload),
offload)
}
val offloads: Seq[RasOffload] = Seq(
RasOffload.from[ExecutedCommandExec](OffloadDeltaCommand()),
RasOffload.from[LeafV2CommandExec](OffloadDeltaCommand())
)
offloads.foreach(
offload =>
ras.injectRasRule(
c => RasOffload.Rule(offload, Validators.newValidator(new GlutenConfig(c.sqlConf)), Nil)))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@ package org.apache.gluten.component
import org.apache.gluten.backendsapi.velox.VeloxBackend
import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.execution.OffloadHudiScan
import org.apache.gluten.extension.columnar.enumerated.RasOffload
import org.apache.gluten.extension.columnar.heuristic.HeuristicTransform
import org.apache.gluten.extension.columnar.validator.Validators
import org.apache.gluten.extension.injector.Injector

import org.apache.spark.sql.execution.FileSourceScanExec
import org.apache.spark.util.SparkReflectionUtil

class VeloxHudiComponent extends Component {
Expand All @@ -38,20 +36,12 @@ class VeloxHudiComponent extends Component {

override def injectRules(injector: Injector): Unit = {
val legacy = injector.gluten.legacy
val ras = injector.gluten.ras
legacy.injectTransform {
c =>
val offload = Seq(OffloadHudiScan()).map(_.toStrcitRule())
HeuristicTransform.Simple(
Validators.newValidator(new GlutenConfig(c.sqlConf), offload),
offload)
}
ras.injectRasRule {
c =>
RasOffload.Rule(
RasOffload.from[FileSourceScanExec](OffloadHudiScan()),
Validators.newValidator(new GlutenConfig(c.sqlConf)),
Nil)
}
}
}
Loading
Loading