Skip to content

Commit 6532153

Browse files
MaxGekkdongjoon-hyun
authored andcommitted
[SPARK-28015][SQL] Check stringToDate() consumes entire input for the yyyy and yyyy-[m]m formats
## What changes were proposed in this pull request? Fix `stringToDate()` for the formats `yyyy` and `yyyy-[m]m` that assumes there are no additional chars after the last components `yyyy` and `[m]m`. In the PR, I propose to check that entire input was consumed for the formats. After the fix, the input `1999 08 01` will be invalid because it matches to the pattern `yyyy` but the strings contains additional chars ` 08 01`. Since Spark 1.6.3 ~ 2.4.3, the behavior is the same. ``` spark-sql> SELECT CAST('1999 08 01' AS DATE); 1999-01-01 ``` This PR makes it return NULL like Hive. ``` spark-sql> SELECT CAST('1999 08 01' AS DATE); NULL ``` ## How was this patch tested? Added new checks to `DateTimeUtilsSuite` for the `1999 08 01` and `1999 08` inputs. Closes apache#25097 from MaxGekk/spark-28015-invalid-date-format. Authored-by: Maxim Gekk <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent ec821b4 commit 6532153

File tree

3 files changed

+106
-24
lines changed

3 files changed

+106
-24
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,10 @@ object DateTimeUtils {
406406
// year should have exact four digits
407407
return None
408408
}
409+
if (i < 2 && j < bytes.length) {
410+
// For the `yyyy` and `yyyy-[m]m` formats, entire input must be consumed.
411+
return None
412+
}
409413
segments(i) = currentSegmentValue
410414
try {
411415
val localDate = LocalDate.of(segments(0), segments(1), segments(2))

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
138138
assert(stringToDate(UTF8String.fromString("015-03-18")).isEmpty)
139139
assert(stringToDate(UTF8String.fromString("015")).isEmpty)
140140
assert(stringToDate(UTF8String.fromString("02015")).isEmpty)
141+
assert(stringToDate(UTF8String.fromString("1999 08 01")).isEmpty)
142+
assert(stringToDate(UTF8String.fromString("1999-08 01")).isEmpty)
143+
assert(stringToDate(UTF8String.fromString("1999 08")).isEmpty)
141144
}
142145

143146
test("string to timestamp") {
@@ -242,6 +245,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
242245
checkStringToTimestamp("2015-03-18T12:03.17-20:0", None)
243246
checkStringToTimestamp("2015-03-18T12:03.17-0:70", None)
244247
checkStringToTimestamp("2015-03-18T12:03.17-1:0:0", None)
248+
checkStringToTimestamp("1999 08 01", None)
249+
checkStringToTimestamp("1999-08 01", None)
250+
checkStringToTimestamp("1999 08", None)
245251

246252
// Truncating the fractional seconds
247253
timeZone = TimeZone.getTimeZone("GMT+00:00")

sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out

Lines changed: 96 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -198,17 +198,29 @@ struct<DATE '1999-01-18':date>
198198
-- !query 21
199199
SELECT date '1999 Jan 08'
200200
-- !query 21 schema
201-
struct<DATE '1999-01-01':date>
201+
struct<>
202202
-- !query 21 output
203-
1999-01-01
203+
org.apache.spark.sql.catalyst.parser.ParseException
204+
205+
Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
206+
207+
== SQL ==
208+
SELECT date '1999 Jan 08'
209+
-------^^^
204210

205211

206212
-- !query 22
207213
SELECT date '1999 08 Jan'
208214
-- !query 22 schema
209-
struct<DATE '1999-01-01':date>
215+
struct<>
210216
-- !query 22 output
211-
1999-01-01
217+
org.apache.spark.sql.catalyst.parser.ParseException
218+
219+
Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
220+
221+
== SQL ==
222+
SELECT date '1999 08 Jan'
223+
-------^^^
212224

213225

214226
-- !query 23
@@ -230,17 +242,29 @@ struct<DATE '1999-08-01':date>
230242
-- !query 25
231243
SELECT date '1999 01 08'
232244
-- !query 25 schema
233-
struct<DATE '1999-01-01':date>
245+
struct<>
234246
-- !query 25 output
235-
1999-01-01
247+
org.apache.spark.sql.catalyst.parser.ParseException
248+
249+
Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
250+
251+
== SQL ==
252+
SELECT date '1999 01 08'
253+
-------^^^
236254

237255

238256
-- !query 26
239257
SELECT date '1999 08 01'
240258
-- !query 26 schema
241-
struct<DATE '1999-01-01':date>
259+
struct<>
242260
-- !query 26 output
243-
1999-01-01
261+
org.apache.spark.sql.catalyst.parser.ParseException
262+
263+
Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
264+
265+
== SQL ==
266+
SELECT date '1999 08 01'
267+
-------^^^
244268

245269

246270
-- !query 27
@@ -254,17 +278,29 @@ struct<DATE '1999-01-08':date>
254278
-- !query 28
255279
SELECT date '1999 Jan 08'
256280
-- !query 28 schema
257-
struct<DATE '1999-01-01':date>
281+
struct<>
258282
-- !query 28 output
259-
1999-01-01
283+
org.apache.spark.sql.catalyst.parser.ParseException
284+
285+
Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
286+
287+
== SQL ==
288+
SELECT date '1999 Jan 08'
289+
-------^^^
260290

261291

262292
-- !query 29
263293
SELECT date '1999 08 Jan'
264294
-- !query 29 schema
265-
struct<DATE '1999-01-01':date>
295+
struct<>
266296
-- !query 29 output
267-
1999-01-01
297+
org.apache.spark.sql.catalyst.parser.ParseException
298+
299+
Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
300+
301+
== SQL ==
302+
SELECT date '1999 08 Jan'
303+
-------^^^
268304

269305

270306
-- !query 30
@@ -286,17 +322,29 @@ struct<DATE '1999-08-01':date>
286322
-- !query 32
287323
SELECT date '1999 01 08'
288324
-- !query 32 schema
289-
struct<DATE '1999-01-01':date>
325+
struct<>
290326
-- !query 32 output
291-
1999-01-01
327+
org.apache.spark.sql.catalyst.parser.ParseException
328+
329+
Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
330+
331+
== SQL ==
332+
SELECT date '1999 01 08'
333+
-------^^^
292334

293335

294336
-- !query 33
295337
SELECT date '1999 08 01'
296338
-- !query 33 schema
297-
struct<DATE '1999-01-01':date>
339+
struct<>
298340
-- !query 33 output
299-
1999-01-01
341+
org.apache.spark.sql.catalyst.parser.ParseException
342+
343+
Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
344+
345+
== SQL ==
346+
SELECT date '1999 08 01'
347+
-------^^^
300348

301349

302350
-- !query 34
@@ -318,17 +366,29 @@ struct<DATE '1999-01-18':date>
318366
-- !query 36
319367
SELECT date '1999 Jan 08'
320368
-- !query 36 schema
321-
struct<DATE '1999-01-01':date>
369+
struct<>
322370
-- !query 36 output
323-
1999-01-01
371+
org.apache.spark.sql.catalyst.parser.ParseException
372+
373+
Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
374+
375+
== SQL ==
376+
SELECT date '1999 Jan 08'
377+
-------^^^
324378

325379

326380
-- !query 37
327381
SELECT date '1999 08 Jan'
328382
-- !query 37 schema
329-
struct<DATE '1999-01-01':date>
383+
struct<>
330384
-- !query 37 output
331-
1999-01-01
385+
org.apache.spark.sql.catalyst.parser.ParseException
386+
387+
Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
388+
389+
== SQL ==
390+
SELECT date '1999 08 Jan'
391+
-------^^^
332392

333393

334394
-- !query 38
@@ -350,17 +410,29 @@ struct<DATE '1999-08-01':date>
350410
-- !query 40
351411
SELECT date '1999 01 08'
352412
-- !query 40 schema
353-
struct<DATE '1999-01-01':date>
413+
struct<>
354414
-- !query 40 output
355-
1999-01-01
415+
org.apache.spark.sql.catalyst.parser.ParseException
416+
417+
Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
418+
419+
== SQL ==
420+
SELECT date '1999 01 08'
421+
-------^^^
356422

357423

358424
-- !query 41
359425
SELECT date '1999 08 01'
360426
-- !query 41 schema
361-
struct<DATE '1999-01-01':date>
427+
struct<>
362428
-- !query 41 output
363-
1999-01-01
429+
org.apache.spark.sql.catalyst.parser.ParseException
430+
431+
Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
432+
433+
== SQL ==
434+
SELECT date '1999 08 01'
435+
-------^^^
364436

365437

366438
-- !query 42

0 commit comments

Comments
 (0)