@@ -26,7 +26,7 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
2626import org .apache .spark .sql .internal .SQLConf
2727import org .apache .spark .sql .types .{DataTypes , StructField , StructType }
2828
29- import org .apache .comet .serde .{CometTruncDate , CometTruncTimestamp }
29+ import org .apache .comet .serde .{CometDateFormat , CometTruncDate , CometTruncTimestamp }
3030import org .apache .comet .testing .{DataGenOptions , FuzzDataGenerator }
3131
3232class CometTemporalExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
@@ -123,6 +123,124 @@ class CometTemporalExpressionSuite extends CometTestBase with AdaptiveSparkPlanH
123123 FuzzDataGenerator .generateDataFrame(r, spark, schema, 1000 , DataGenOptions ())
124124 }
125125
126+ test(" date_format with timestamp column" ) {
127+ // Filter out formats with embedded quotes that need special handling
128+ val supportedFormats = CometDateFormat .supportedFormats.keys.toSeq
129+ .filterNot(_.contains(" '" ))
130+
131+ createTimestampTestData.createOrReplaceTempView(" tbl" )
132+
133+ withSQLConf(SQLConf .SESSION_LOCAL_TIMEZONE .key -> " UTC" ) {
134+ for (format <- supportedFormats) {
135+ checkSparkAnswerAndOperator(s " SELECT c0, date_format(c0, ' $format') from tbl order by c0 " )
136+ }
137+ // Test ISO format with embedded quotes separately using double-quoted string
138+ checkSparkAnswerAndOperator(
139+ " SELECT c0, date_format(c0, \" yyyy-MM-dd'T'HH:mm:ss\" ) from tbl order by c0" )
140+ }
141+ }
142+
143+ test(" date_format with specific format strings" ) {
144+ // Test specific format strings with explicit timestamp data
145+ createTimestampTestData.createOrReplaceTempView(" tbl" )
146+
147+ withSQLConf(SQLConf .SESSION_LOCAL_TIMEZONE .key -> " UTC" ) {
148+ // Date formats
149+ checkSparkAnswerAndOperator(" SELECT c0, date_format(c0, 'yyyy-MM-dd') from tbl order by c0" )
150+ checkSparkAnswerAndOperator(" SELECT c0, date_format(c0, 'yyyy/MM/dd') from tbl order by c0" )
151+
152+ // Time formats
153+ checkSparkAnswerAndOperator(" SELECT c0, date_format(c0, 'HH:mm:ss') from tbl order by c0" )
154+ checkSparkAnswerAndOperator(" SELECT c0, date_format(c0, 'HH:mm') from tbl order by c0" )
155+
156+ // Combined formats
157+ checkSparkAnswerAndOperator(
158+ " SELECT c0, date_format(c0, 'yyyy-MM-dd HH:mm:ss') from tbl order by c0" )
159+
160+ // Day/month names
161+ checkSparkAnswerAndOperator(" SELECT c0, date_format(c0, 'EEEE') from tbl order by c0" )
162+ checkSparkAnswerAndOperator(" SELECT c0, date_format(c0, 'MMMM') from tbl order by c0" )
163+
164+ // 12-hour time
165+ checkSparkAnswerAndOperator(" SELECT c0, date_format(c0, 'hh:mm:ss a') from tbl order by c0" )
166+
167+ // ISO format (use double single-quotes to escape the literal T)
168+ checkSparkAnswerAndOperator(
169+ " SELECT c0, date_format(c0, \" yyyy-MM-dd'T'HH:mm:ss\" ) from tbl order by c0" )
170+ }
171+ }
172+
173+ test(" date_format with literal timestamp" ) {
174+ // Test specific literal timestamp formats
175+ // Disable constant folding to ensure Comet actually executes the expression
176+ withSQLConf(
177+ SQLConf .SESSION_LOCAL_TIMEZONE .key -> " UTC" ,
178+ SQLConf .OPTIMIZER_EXCLUDED_RULES .key ->
179+ " org.apache.spark.sql.catalyst.optimizer.ConstantFolding" ) {
180+ checkSparkAnswerAndOperator(
181+ " SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'yyyy-MM-dd')" )
182+ checkSparkAnswerAndOperator(
183+ " SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'yyyy-MM-dd HH:mm:ss')" )
184+ checkSparkAnswerAndOperator(
185+ " SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'HH:mm:ss')" )
186+ checkSparkAnswerAndOperator(" SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'EEEE')" )
187+ checkSparkAnswerAndOperator(
188+ " SELECT date_format(TIMESTAMP '2024-03-15 14:30:45', 'hh:mm:ss a')" )
189+ }
190+ }
191+
192+ test(" date_format with null" ) {
193+ withSQLConf(
194+ SQLConf .SESSION_LOCAL_TIMEZONE .key -> " UTC" ,
195+ SQLConf .OPTIMIZER_EXCLUDED_RULES .key ->
196+ " org.apache.spark.sql.catalyst.optimizer.ConstantFolding" ) {
197+ checkSparkAnswerAndOperator(" SELECT date_format(CAST(NULL AS TIMESTAMP), 'yyyy-MM-dd')" )
198+ }
199+ }
200+
201+ test(" date_format unsupported format falls back to Spark" ) {
202+ createTimestampTestData.createOrReplaceTempView(" tbl" )
203+
204+ withSQLConf(SQLConf .SESSION_LOCAL_TIMEZONE .key -> " UTC" ) {
205+ // Unsupported format string
206+ checkSparkAnswerAndFallbackReason(
207+ " SELECT c0, date_format(c0, 'yyyy-MM-dd EEEE') from tbl order by c0" ,
208+ " Format 'yyyy-MM-dd EEEE' is not supported" )
209+ }
210+ }
211+
212+ test(" date_format with non-UTC timezone falls back to Spark" ) {
213+ createTimestampTestData.createOrReplaceTempView(" tbl" )
214+
215+ val nonUtcTimezones =
216+ Seq (" America/New_York" , " America/Los_Angeles" , " Europe/London" , " Asia/Tokyo" )
217+
218+ for (tz <- nonUtcTimezones) {
219+ withSQLConf(SQLConf .SESSION_LOCAL_TIMEZONE .key -> tz) {
220+ // Non-UTC timezones should fall back to Spark as Incompatible
221+ checkSparkAnswerAndFallbackReason(
222+ " SELECT c0, date_format(c0, 'yyyy-MM-dd HH:mm:ss') from tbl order by c0" ,
223+ s " Non-UTC timezone ' $tz' may produce different results " )
224+ }
225+ }
226+ }
227+
228+ test(" date_format with non-UTC timezone works when allowIncompatible is enabled" ) {
229+ createTimestampTestData.createOrReplaceTempView(" tbl" )
230+
231+ val nonUtcTimezones = Seq (" America/New_York" , " Europe/London" , " Asia/Tokyo" )
232+
233+ for (tz <- nonUtcTimezones) {
234+ withSQLConf(
235+ SQLConf .SESSION_LOCAL_TIMEZONE .key -> tz,
236+ " spark.comet.expr.DateFormatClass.allowIncompatible" -> " true" ) {
237+ // With allowIncompatible enabled, Comet will execute the expression
238+ // Results may differ from Spark but should not throw errors
239+ checkSparkAnswer(" SELECT c0, date_format(c0, 'yyyy-MM-dd') from tbl order by c0" )
240+ }
241+ }
242+ }
243+
126244 test(" unix_date" ) {
127245 val r = new Random (42 )
128246 val schema = StructType (Seq (StructField (" c0" , DataTypes .DateType , true )))
0 commit comments