From bc7cda5a336b03c2bb740e8682b042b23257e6a4 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 11 Sep 2025 18:22:09 -0700 Subject: [PATCH 1/3] improve initcap test --- docs/source/user-guide/latest/expressions.md | 2 +- .../comet/CometStringExpressionSuite.scala | 23 +++++++++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index 5f7beb42bc..d47cd89abc 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -68,7 +68,7 @@ incompatible expressions. | ConcatWs | Yes | | | Contains | Yes | | | EndsWith | Yes | | -| InitCap | No | Requires `spark.comet.exec.initCap.enabled=true` | +| InitCap | No | Behavior is different in some cases, such as hyphenated names. | Length | Yes | | | Like | Yes | | | Lower | No | Results can vary depending on locale and character set. Requires `spark.comet.caseConversion.enabled=true` | diff --git a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala index 4c0f80ddb7..b24dfed499 100644 --- a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala @@ -95,15 +95,28 @@ class CometStringExpressionSuite extends CometTestBase { } } - test("InitCap") { + test("InitCap compatible cases") { val table = "names" withTable(table) { sql(s"create table $table(id int, name varchar(20)) using parquet") + withSQLConf(CometConf.getExprAllowIncompatConfigKey("InitCap") -> "true") { + sql( + s"insert into $table values(1, 'james smith'), (2, 'michael rose'), " + + "(3, 'robert williams'), (4, 'rames rose'), (5, 'james smith'), " + + "(7, 'james ähtäri')") + checkSparkAnswerAndOperator(s"SELECT initcap(name) FROM $table") + } + } + } + + test("InitCap incompatible cases") { + val table = "names" + withTable(table) { + sql(s"create table $table(id int, name varchar(20)) using parquet") + // Comet and Spark differ on hyphenated names sql( - s"insert into $table values(1, 'james smith'), (2, 'michael rose'), " + - "(3, 'robert williams'), (4, 'rames rose'), (5, 'james smith'), " + - "(6, 'robert rose-smith'), (7, 'james ähtäri')") - checkSparkAnswer(s"SELECT initcap(name) FROM $table") + s"insert into $table values(6, 'robert rose-smith')") + checkSparkAnswer(s"SELECT initcap(name) FROM $table") } } From 7d09ed0293136b637316d636dc0c25b4a454749e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 11 Sep 2025 18:22:24 -0700 Subject: [PATCH 2/3] improve initcap test --- docs/source/user-guide/latest/expressions.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index d47cd89abc..5a459866ef 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -24,13 +24,13 @@ natively in Comet and provide the same results as Spark, or will fall back to Sp be compatible. All expressions are enabled by default, but can be disabled by setting -`spark.comet.expression.EXPRNAME.enabled=false`, where `EXPRNAME` is the expression name as specified in +`spark.comet.expression.EXPRNAME.enabled=false`, where `EXPRNAME` is the expression name as specified in the following tables, such as `Length`, or `StartsWith`. Expressions that are not Spark-compatible will fall back to Spark by default and can be enabled by setting `spark.comet.expression.EXPRNAME.allowIncompatible=true`. -It is also possible to specify `spark.comet.expression.allowIncompatible=true` to enable all +It is also possible to specify `spark.comet.expression.allowIncompatible=true` to enable all incompatible expressions. ## Conditional Expressions @@ -68,7 +68,7 @@ incompatible expressions. | ConcatWs | Yes | | | Contains | Yes | | | EndsWith | Yes | | -| InitCap | No | Behavior is different in some cases, such as hyphenated names. +| InitCap | No | Behavior is different in some cases, such as hyphenated names. | | Length | Yes | | | Like | Yes | | | Lower | No | Results can vary depending on locale and character set. Requires `spark.comet.caseConversion.enabled=true` | From ed92f6a30b2af0965f3f8f10bfc91aa91964021f Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 11 Sep 2025 18:47:01 -0700 Subject: [PATCH 3/3] format --- .../scala/org/apache/comet/CometStringExpressionSuite.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala index b24dfed499..44d40cf1c1 100644 --- a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala @@ -114,9 +114,8 @@ class CometStringExpressionSuite extends CometTestBase { withTable(table) { sql(s"create table $table(id int, name varchar(20)) using parquet") // Comet and Spark differ on hyphenated names - sql( - s"insert into $table values(6, 'robert rose-smith')") - checkSparkAnswer(s"SELECT initcap(name) FROM $table") + sql(s"insert into $table values(6, 'robert rose-smith')") + checkSparkAnswer(s"SELECT initcap(name) FROM $table") } }