Skip to content

Commit c9f6d45

Browse files
authored
[GLUTEN-11088][VL] Fix GlutenDatasetSuite in Spark-4.0 (#11197)
* [VL] Fix GlutenDatasetSuite in Spark-4.0
1 parent a54a803 commit c9f6d45

File tree

5 files changed

+19
-10
lines changed

5 files changed

+19
-10
lines changed

backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,4 +583,7 @@ object VeloxBackendSettings extends BackendSettingsApi {
583583
override def supportOverwriteByExpression(): Boolean = enableEnhancedFeatures()
584584

585585
override def supportOverwritePartitionsDynamic(): Boolean = enableEnhancedFeatures()
586+
587+
/** Velox does not support columnar shuffle with empty schema. */
588+
override def supportEmptySchemaColumnarShuffle(): Boolean = false
586589
}

backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,15 @@ class VeloxValidatorApi extends ValidatorApi {
8888
outputAttributes: Seq[Attribute],
8989
outputPartitioning: Partitioning,
9090
child: SparkPlan): Option[String] = {
91-
if (outputAttributes.isEmpty) {
92-
// See: https://github.com/apache/incubator-gluten/issues/7600.
93-
return Some("Shuffle with empty output schema is not supported")
94-
}
95-
if (child.output.isEmpty) {
96-
// See: https://github.com/apache/incubator-gluten/issues/7600.
97-
return Some("Shuffle with empty input schema is not supported")
91+
if (!BackendsApiManager.getSettings.supportEmptySchemaColumnarShuffle()) {
92+
if (outputAttributes.isEmpty) {
93+
// See: https://github.com/apache/incubator-gluten/issues/7600.
94+
return Some("Shuffle with empty output schema is not supported")
95+
}
96+
if (child.output.isEmpty) {
97+
// See: https://github.com/apache/incubator-gluten/issues/7600.
98+
return Some("Shuffle with empty input schema is not supported")
99+
}
98100
}
99101
doSchemaValidate(child.schema)
100102
}

gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,4 +162,7 @@ trait BackendSettingsApi {
162162
def supportOverwriteByExpression(): Boolean = false
163163

164164
def supportOverwritePartitionsDynamic(): Boolean = false
165+
166+
/** Whether the backend supports columnar shuffle with empty schema. */
167+
def supportEmptySchemaColumnarShuffle(): Boolean = true
165168
}

gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ case class CollectLimitTransformerRule() extends Rule[SparkPlan] {
3030
}
3131

3232
val transformed = plan.transformUp {
33-
case exec: CollectLimitExec if exec.child.supportsColumnar =>
33+
case exec: CollectLimitExec
34+
if exec.child.supportsColumnar &&
35+
(exec.child.output.nonEmpty ||
36+
BackendsApiManager.getSettings.supportEmptySchemaColumnarShuffle()) =>
3437
val offset = SparkShimLoader.getSparkShims.getCollectLimitOffset(exec)
3538
BackendsApiManager.getSparkPlanExecApiInstance
3639
.genColumnarCollectLimitExec(exec.limit, exec.child, offset)

gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -839,8 +839,6 @@ class VeloxTestSettings extends BackendTestSettings {
839839
// Rewrite the following two tests in GlutenDatasetSuite.
840840
.exclude("dropDuplicates: columns with same column name")
841841
.exclude("groupBy.as")
842-
// TODO: fix in Spark-4.0
843-
.exclude("SPARK-23627: provide isEmpty in DataSet")
844842
enableSuite[GlutenDateFunctionsSuite]
845843
// The below two are replaced by two modified versions.
846844
.exclude("unix_timestamp")

0 commit comments

Comments
 (0)