Skip to content

Commit d26642d

Browse files
[SPARK-28107][SQL] Support 'DAY TO (HOUR|MINUTE|SECOND)', 'HOUR TO (MINUTE|SECOND)' and 'MINUTE TO SECOND'
## What changes were proposed in this pull request? The interval conversion behavior is same with the PostgreSQL. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/interval.sql#L180-L203 ## How was this patch tested? UT. Closes apache#25000 from lipzhu/SPARK-28107. Lead-authored-by: Zhu, Lipeng <[email protected]> Co-authored-by: Dongjoon Hyun <[email protected]> Co-authored-by: Lipeng Zhu <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 3a94fb3 commit d26642d

File tree

4 files changed

+87
-9
lines changed

4 files changed

+87
-9
lines changed

common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ private static String unitRegex(String unit) {
5454
private static Pattern yearMonthPattern =
5555
Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
5656

57-
private static Pattern dayTimePattern =
58-
Pattern.compile("^(?:['|\"])?([+|-])?((\\d+) )?(\\d+):(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
57+
private static Pattern dayTimePattern = Pattern.compile(
58+
"^(?:['|\"])?([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
5959

6060
private static Pattern quoteTrimPattern = Pattern.compile("^(?:['|\"])?(.*?)(?:['|\"])?$");
6161

@@ -160,6 +160,20 @@ public static CalendarInterval fromYearMonthString(String s) throws IllegalArgum
160160
* adapted from HiveIntervalDayTime.valueOf
161161
*/
162162
public static CalendarInterval fromDayTimeString(String s) throws IllegalArgumentException {
163+
return fromDayTimeString(s, "day", "second");
164+
}
165+
166+
/**
167+
* Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn
168+
*
169+
* adapted from HiveIntervalDayTime.valueOf.
170+
* Below interval conversion patterns are supported:
171+
* - DAY TO (HOUR|MINUTE|SECOND)
172+
* - HOUR TO (MINUTE|SECOND)
173+
* - MINUTE TO SECOND
174+
*/
175+
public static CalendarInterval fromDayTimeString(String s, String from, String to)
176+
throws IllegalArgumentException {
163177
CalendarInterval result = null;
164178
if (s == null) {
165179
throw new IllegalArgumentException("Interval day-time string was null");
@@ -174,12 +188,40 @@ public static CalendarInterval fromDayTimeString(String s) throws IllegalArgumen
174188
int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
175189
long days = m.group(2) == null ? 0 : toLongWithRange("day", m.group(3),
176190
0, Integer.MAX_VALUE);
177-
long hours = toLongWithRange("hour", m.group(4), 0, 23);
178-
long minutes = toLongWithRange("minute", m.group(5), 0, 59);
179-
long seconds = toLongWithRange("second", m.group(6), 0, 59);
191+
long hours = 0;
192+
long minutes;
193+
long seconds = 0;
194+
if (m.group(5) != null || from.equals("minute")) { // 'HH:mm:ss' or 'mm:ss minute'
195+
hours = toLongWithRange("hour", m.group(5), 0, 23);
196+
minutes = toLongWithRange("minute", m.group(6), 0, 59);
197+
seconds = toLongWithRange("second", m.group(7), 0, 59);
198+
} else if (m.group(8) != null){ // 'mm:ss.nn'
199+
minutes = toLongWithRange("minute", m.group(6), 0, 59);
200+
seconds = toLongWithRange("second", m.group(7), 0, 59);
201+
} else { // 'HH:mm'
202+
hours = toLongWithRange("hour", m.group(6), 0, 23);
203+
minutes = toLongWithRange("second", m.group(7), 0, 59);
204+
}
180205
// Hive allow nanosecond precision interval
181-
String nanoStr = m.group(8) == null ? null : (m.group(8) + "000000000").substring(0, 9);
206+
String nanoStr = m.group(9) == null ? null : (m.group(9) + "000000000").substring(0, 9);
182207
long nanos = toLongWithRange("nanosecond", nanoStr, 0L, 999999999L);
208+
switch (to) {
209+
case "hour":
210+
minutes = 0;
211+
seconds = 0;
212+
nanos = 0;
213+
break;
214+
case "minute":
215+
seconds = 0;
216+
nanos = 0;
217+
break;
218+
case "second":
219+
// No-op
220+
break;
221+
default:
222+
throw new IllegalArgumentException(
223+
String.format("Cannot support (interval '%s' %s to %s) expression", s, from, to));
224+
}
183225
result = new CalendarInterval(0, sign * (
184226
days * MICROS_PER_DAY + hours * MICROS_PER_HOUR + minutes * MICROS_PER_MINUTE +
185227
seconds * MICROS_PER_SECOND + nanos / 1000L));

common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,14 @@ public void fromDayTimeStringTest() {
185185
} catch (IllegalArgumentException e) {
186186
assertTrue(e.getMessage().contains("not match day-time format"));
187187
}
188+
189+
try {
190+
input = "5 1:12:20";
191+
fromDayTimeString(input, "hour", "microsecond");
192+
fail("Expected to throw an exception for the invalid convention type");
193+
} catch (IllegalArgumentException e) {
194+
assertTrue(e.getMessage().contains("Cannot support (interval"));
195+
}
188196
}
189197

190198
@Test

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1851,7 +1851,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
18511851
* Create a [[CalendarInterval]] for a unit value pair. Two unit configuration types are
18521852
* supported:
18531853
* - Single unit.
1854-
* - From-To unit (only 'YEAR TO MONTH' and 'DAY TO SECOND' and 'HOUR to SECOND' are supported).
1854+
* - From-To unit ('YEAR TO MONTH', 'DAY TO HOUR', 'DAY TO MINUTE', 'DAY TO SECOND',
1855+
* 'HOUR TO MINUTE', 'HOUR TO SECOND' and 'MINUTE TO SECOND' are supported).
18551856
*/
18561857
override def visitIntervalField(ctx: IntervalFieldContext): CalendarInterval = withOrigin(ctx) {
18571858
import ctx._
@@ -1866,10 +1867,18 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
18661867
CalendarInterval.fromSingleUnitString(u, s)
18671868
case ("year", Some("month")) =>
18681869
CalendarInterval.fromYearMonthString(s)
1870+
case ("day", Some("hour")) =>
1871+
CalendarInterval.fromDayTimeString(s, "day", "hour")
1872+
case ("day", Some("minute")) =>
1873+
CalendarInterval.fromDayTimeString(s, "day", "minute")
18691874
case ("day", Some("second")) =>
1870-
CalendarInterval.fromDayTimeString(s)
1875+
CalendarInterval.fromDayTimeString(s, "day", "second")
1876+
case ("hour", Some("minute")) =>
1877+
CalendarInterval.fromDayTimeString(s, "hour", "minute")
18711878
case ("hour", Some("second")) =>
1872-
CalendarInterval.fromDayTimeString(s)
1879+
CalendarInterval.fromDayTimeString(s, "hour", "second")
1880+
case ("minute", Some("second")) =>
1881+
CalendarInterval.fromDayTimeString(s, "minute", "second")
18731882
case (from, Some(t)) =>
18741883
throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx)
18751884
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,12 +1183,31 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
11831183
Row(CalendarInterval.fromString("interval 100 milliseconds")))
11841184
checkAnswer(sql("select interval '10-9' year to month"),
11851185
Row(CalendarInterval.fromString("interval 10 years 9 months")))
1186+
checkAnswer(sql("select interval '20 15:40:32.99899999' day to hour"),
1187+
Row(CalendarInterval.fromString("interval 2 weeks 6 days 15 hours")))
1188+
checkAnswer(sql("select interval '20 15:40:32.99899999' day to minute"),
1189+
Row(CalendarInterval.fromString("interval 2 weeks 6 days 15 hours 40 minutes")))
11861190
checkAnswer(sql("select interval '20 15:40:32.99899999' day to second"),
11871191
Row(CalendarInterval.fromString("interval 2 weeks 6 days 15 hours 40 minutes " +
11881192
"32 seconds 998 milliseconds 999 microseconds")))
1193+
checkAnswer(sql("select interval '15:40:32.99899999' hour to minute"),
1194+
Row(CalendarInterval.fromString("interval 15 hours 40 minutes")))
1195+
checkAnswer(sql("select interval '15:40.99899999' hour to second"),
1196+
Row(CalendarInterval.fromString("interval 15 minutes 40 seconds 998 milliseconds " +
1197+
"999 microseconds")))
1198+
checkAnswer(sql("select interval '15:40' hour to second"),
1199+
Row(CalendarInterval.fromString("interval 15 hours 40 minutes")))
11891200
checkAnswer(sql("select interval '15:40:32.99899999' hour to second"),
11901201
Row(CalendarInterval.fromString("interval 15 hours 40 minutes 32 seconds 998 milliseconds " +
11911202
"999 microseconds")))
1203+
checkAnswer(sql("select interval '20 40:32.99899999' minute to second"),
1204+
Row(CalendarInterval.fromString("interval 2 weeks 6 days 40 minutes 32 seconds " +
1205+
"998 milliseconds 999 microseconds")))
1206+
checkAnswer(sql("select interval '40:32.99899999' minute to second"),
1207+
Row(CalendarInterval.fromString("interval 40 minutes 32 seconds 998 milliseconds " +
1208+
"999 microseconds")))
1209+
checkAnswer(sql("select interval '40:32' minute to second"),
1210+
Row(CalendarInterval.fromString("interval 40 minutes 32 seconds")))
11921211
checkAnswer(sql("select interval '30' year"),
11931212
Row(CalendarInterval.fromString("interval 30 years")))
11941213
checkAnswer(sql("select interval '25' month"),

0 commit comments

Comments
 (0)