Skip to content

Commit 84dca9f

Browse files
authored
feat: Support more date part expressions (#2316)
1 parent f0f0653 commit 84dca9f

File tree

4 files changed

+162
-22
lines changed

4 files changed

+162
-22
lines changed

docs/source/user-guide/latest/expressions.md

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -91,19 +91,25 @@ incompatible expressions.
9191

9292
## Date/Time Functions
9393

94-
| Expression | SQL | Spark-Compatible? | Compatibility Notes |
95-
| -------------- | ---------------------------- | ----------------- | ----------------------------------------------------------------------------- |
96-
| DateAdd | `date_add` | Yes | |
97-
| DateSub | `date_sub` | Yes | |
98-
| DatePart | `date_part(field, source)` | Yes | Only `year` is supported |
99-
| Extract | `extract(field FROM source)` | Yes | Only `year` is supported |
100-
| FromUnixTime | `from_unixtime` | No | Does not support format, supports only -8334601211038 <= sec <= 8210266876799 |
101-
| Hour | `hour` | Yes | |
102-
| Minute | `minute` | Yes | |
103-
| Second | `second` | Yes | |
104-
| TruncDate | `trunc` | Yes | |
105-
| TruncTimestamp | `trunc_date` | Yes | |
106-
| Year | `year` | Yes | |
94+
| Expression | SQL | Spark-Compatible? | Compatibility Notes |
95+
| -------------- |------------------------------| ----------------- |------------------------------------------------------------------------------------------------------|
96+
| DateAdd | `date_add` | Yes | |
97+
| DateSub | `date_sub` | Yes | |
98+
| DatePart | `date_part(field, source)` | Yes | Supported values of `field`: `year`/`month`/`week`/`day`/`dayofweek`/`doy`/`quarter`/`hour`/`minute` |
99+
| Extract | `extract(field FROM source)` | Yes | Supported values of `field`: `year`/`month`/`week`/`day`/`dayofweek`/`doy`/`quarter`/`hour`/`minute` |
100+
| FromUnixTime | `from_unixtime` | No | Does not support format, supports only -8334601211038 <= sec <= 8210266876799 |
101+
| Hour | `hour` | Yes | |
102+
| Minute | `minute` | Yes | |
103+
| Second | `second` | Yes | |
104+
| TruncDate | `trunc` | Yes | |
105+
| TruncTimestamp | `trunc_date` | Yes | |
106+
| Year | `year` | Yes | |
107+
| Month | `month` | Yes | |
108+
| DayOfMonth | `day`/`dayofmonth` | Yes | |
109+
| DayOfWeek | `dayofweek` | Yes | |
110+
| DayOfYear | `dayofyear` | Yes | |
111+
| WeekOfYear | `weekofyear` | Yes | |
112+
| Quarter | `quarter` | Yes | |
107113

108114
## Math Expressions
109115

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,16 @@ object QueryPlanSerde extends Logging with CometExprShim {
203203
classOf[Second] -> CometSecond,
204204
classOf[TruncDate] -> CometTruncDate,
205205
classOf[TruncTimestamp] -> CometTruncTimestamp,
206-
classOf[Year] -> CometYear)
206+
classOf[Year] -> CometYear,
207+
classOf[Month] -> CometMonth,
208+
classOf[DayOfMonth] -> CometDayOfMonth,
209+
classOf[DayOfWeek] -> CometDayOfWeek,
210+
// FIXME: current datafusion version does not support isodow (WeekDay)
211+
// , see: https://github.com/apache/datafusion-comet/issues/2330
212+
// classOf[WeekDay] -> CometWeekDay,
213+
classOf[DayOfYear] -> CometDayOfYear,
214+
classOf[WeekOfYear] -> CometWeekOfYear,
215+
classOf[Quarter] -> CometQuarter)
207216

208217
private val conversionExpressions: Map[Class[_ <: Expression], CometExpressionSerde[_]] = Map(
209218
classOf[Cast] -> CometCast)

spark/src/main/scala/org/apache/comet/serde/datetime.scala

Lines changed: 127 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,43 @@
1919

2020
package org.apache.comet.serde
2121

22-
import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub, Hour, Literal, Minute, Second, TruncDate, TruncTimestamp, Year}
22+
import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub, DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, Literal, Minute, Month, Quarter, Second, TruncDate, TruncTimestamp, WeekDay, WeekOfYear, Year}
2323
import org.apache.spark.sql.types.{DateType, IntegerType}
2424

2525
import org.apache.comet.CometSparkSessionExtensions.withInfo
26+
import org.apache.comet.serde.CometGetDateField.CometGetDateField
2627
import org.apache.comet.serde.ExprOuterClass.Expr
2728
import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType}
2829

29-
object CometYear extends CometExpressionSerde[Year] {
30-
override def convert(
31-
expr: Year,
30+
private object CometGetDateField extends Enumeration {
31+
type CometGetDateField = Value
32+
33+
// See: https://datafusion.apache.org/user-guide/sql/scalar_functions.html#date-part
34+
val Year: Value = Value("year")
35+
val Month: Value = Value("month")
36+
val DayOfMonth: Value = Value("day")
37+
// Datafusion: day of the week where Sunday is 0, but spark sunday is 1 (1 = Sunday,
38+
// 2 = Monday, ..., 7 = Saturday).
39+
val DayOfWeek: Value = Value("dow")
40+
val DayOfYear: Value = Value("doy")
41+
val WeekDay: Value = Value("isodow") // day of the week where Monday is 0
42+
val WeekOfYear: Value = Value("week")
43+
val Quarter: Value = Value("quarter")
44+
}
45+
46+
/**
47+
* Convert spark [[org.apache.spark.sql.catalyst.expressions.GetDateField]] expressions to
48+
* Datafusion
49+
* [[https://datafusion.apache.org/user-guide/sql/scalar_functions.html#date-part datepart]]
50+
* function.
51+
*/
52+
trait CometExprGetDateField[T <: GetDateField] {
53+
def getDateField(
54+
expr: T,
55+
field: CometGetDateField,
3256
inputs: Seq[Attribute],
3357
binding: Boolean): Option[ExprOuterClass.Expr] = {
34-
val periodType = exprToProtoInternal(Literal("year"), inputs, binding)
58+
val periodType = exprToProtoInternal(Literal(field.toString), inputs, binding)
3559
val childExpr = exprToProtoInternal(expr.child, inputs, binding)
3660
val optExpr = scalarFunctionExprToProto("datepart", Seq(periodType, childExpr): _*)
3761
.map(e => {
@@ -51,6 +75,104 @@ object CometYear extends CometExpressionSerde[Year] {
5175
}
5276
}
5377

78+
object CometYear extends CometExpressionSerde[Year] with CometExprGetDateField[Year] {
79+
override def convert(
80+
expr: Year,
81+
inputs: Seq[Attribute],
82+
binding: Boolean): Option[ExprOuterClass.Expr] = {
83+
getDateField(expr, CometGetDateField.Year, inputs, binding)
84+
}
85+
}
86+
87+
object CometMonth extends CometExpressionSerde[Month] with CometExprGetDateField[Month] {
88+
override def convert(
89+
expr: Month,
90+
inputs: Seq[Attribute],
91+
binding: Boolean): Option[ExprOuterClass.Expr] = {
92+
getDateField(expr, CometGetDateField.Month, inputs, binding)
93+
}
94+
}
95+
96+
object CometDayOfMonth
97+
extends CometExpressionSerde[DayOfMonth]
98+
with CometExprGetDateField[DayOfMonth] {
99+
override def convert(
100+
expr: DayOfMonth,
101+
inputs: Seq[Attribute],
102+
binding: Boolean): Option[ExprOuterClass.Expr] = {
103+
getDateField(expr, CometGetDateField.DayOfMonth, inputs, binding)
104+
}
105+
}
106+
107+
object CometDayOfWeek
108+
extends CometExpressionSerde[DayOfWeek]
109+
with CometExprGetDateField[DayOfWeek] {
110+
override def convert(
111+
expr: DayOfWeek,
112+
inputs: Seq[Attribute],
113+
binding: Boolean): Option[ExprOuterClass.Expr] = {
114+
// Datafusion: day of the week where Sunday is 0, but spark sunday is 1 (1 = Sunday,
115+
// 2 = Monday, ..., 7 = Saturday). So we need to add 1 to the result of datepart(dow, ...)
116+
val optExpr = getDateField(expr, CometGetDateField.DayOfWeek, inputs, binding)
117+
.zip(exprToProtoInternal(Literal(1), inputs, binding))
118+
.map { case (left, right) =>
119+
Expr
120+
.newBuilder()
121+
.setAdd(
122+
ExprOuterClass.MathExpr
123+
.newBuilder()
124+
.setLeft(left)
125+
.setRight(right)
126+
.setEvalMode(ExprOuterClass.EvalMode.LEGACY)
127+
.setReturnType(serializeDataType(IntegerType).get)
128+
.build())
129+
.build()
130+
}
131+
.headOption
132+
optExprWithInfo(optExpr, expr, expr.child)
133+
}
134+
}
135+
136+
object CometWeekDay extends CometExpressionSerde[WeekDay] with CometExprGetDateField[WeekDay] {
137+
override def convert(
138+
expr: WeekDay,
139+
inputs: Seq[Attribute],
140+
binding: Boolean): Option[ExprOuterClass.Expr] = {
141+
getDateField(expr, CometGetDateField.WeekDay, inputs, binding)
142+
}
143+
}
144+
145+
object CometDayOfYear
146+
extends CometExpressionSerde[DayOfYear]
147+
with CometExprGetDateField[DayOfYear] {
148+
override def convert(
149+
expr: DayOfYear,
150+
inputs: Seq[Attribute],
151+
binding: Boolean): Option[ExprOuterClass.Expr] = {
152+
getDateField(expr, CometGetDateField.DayOfYear, inputs, binding)
153+
}
154+
}
155+
156+
object CometWeekOfYear
157+
extends CometExpressionSerde[WeekOfYear]
158+
with CometExprGetDateField[WeekOfYear] {
159+
override def convert(
160+
expr: WeekOfYear,
161+
inputs: Seq[Attribute],
162+
binding: Boolean): Option[ExprOuterClass.Expr] = {
163+
getDateField(expr, CometGetDateField.WeekOfYear, inputs, binding)
164+
}
165+
}
166+
167+
object CometQuarter extends CometExpressionSerde[Quarter] with CometExprGetDateField[Quarter] {
168+
override def convert(
169+
expr: Quarter,
170+
inputs: Seq[Attribute],
171+
binding: Boolean): Option[ExprOuterClass.Expr] = {
172+
getDateField(expr, CometGetDateField.Quarter, inputs, binding)
173+
}
174+
}
175+
54176
object CometHour extends CometExpressionSerde[Hour] {
55177
override def convert(
56178
expr: Hour,

spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,14 +1715,17 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
17151715
}
17161716
}
17171717

1718-
test("Year") {
1718+
test("DatePart functions: Year/Month/DayOfMonth/DayOfWeek/DayOfYear/WeekOfYear/Quarter") {
17191719
Seq(false, true).foreach { dictionary =>
17201720
withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
17211721
val table = "test"
17221722
withTable(table) {
17231723
sql(s"create table $table(col timestamp) using parquet")
1724-
sql(s"insert into $table values (now()), (null)")
1725-
checkSparkAnswerAndOperator(s"SELECT year(col) FROM $table")
1724+
sql(s"insert into $table values (now()), (timestamp('1900-01-01')), (null)")
1725+
// TODO: weekday(col) https://github.com/apache/datafusion-comet/issues/2330
1726+
checkSparkAnswerAndOperator(
1727+
"SELECT col, year(col), month(col), day(col)," +
1728+
s" dayofweek(col), dayofyear(col), weekofyear(col), quarter(col) FROM $table")
17261729
}
17271730
}
17281731
}

0 commit comments

Comments
 (0)