diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5aab630..14f5205 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -17,8 +17,7 @@ jobs: fail-fast: false matrix: version: - - '1.6' - - '1.8' + - '1.10' - '1' - 'nightly' os: @@ -33,7 +32,7 @@ jobs: with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v2 + - uses: actions/cache@v4 env: cache-name: cache-artifacts with: diff --git a/Project.toml b/Project.toml index b0c2ba3..102f1b1 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c" TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" [compat] +BenchmarkTools = "1.6" ChunkedBase = "0.3" Dates = "1" FixedPointDecimals = "0.4.3, 0.5, 0.6" @@ -20,10 +21,11 @@ Parsers = "2.7" SentinelArrays = "1" SnoopPrecompile = "1" TimeZones = "1" -julia = "1.6" +julia = "1.10" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CodecZlibNG = "642d12eb-acb5-4437-bcfc-a25e07ad685c" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" @@ -31,4 +33,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [targets] -test = ["Aqua", "CodecZlibNG", "JET", "Logging", "Test", "UUIDs"] +test = ["Aqua", "BenchmarkTools", "CodecZlibNG", "JET", "Logging", "Test", "UUIDs"] diff --git a/src/type_parsers/datetime_parser.jl b/src/type_parsers/datetime_parser.jl index 8a3f46d..8496dff 100644 --- a/src/type_parsers/datetime_parser.jl +++ b/src/type_parsers/datetime_parser.jl @@ -16,6 +16,18 @@ It will parse the following formats: - `yyyy-mm-ddTHH:MM:SSZ` - `yyyy-mm-ddTHH:MM:SS.sZ` +Negative years are also supported. The smallest DateTime value that can be represented is +`-292277024-05-15T16:47:04.192` and the largest is `292277025-08-17T07:12:55.807`, since +they are backed by a 64 bit integer with millisecond precision. These values correspond to +`DateTime(Dates.UTM(typemin(Int)))` and `DateTime(Dates.UTM(typemax(Int)))` respectively. + +Additionally, since some systems use 32 bit integers to represent years and we don't want to +fail loudly parsing these even though we can't represent them exactly, all valid +timestamps with in the range `[-2147483648-01-01T00:00:00.000, -292277024-05-15T16:47:04.193]` +will be clamped to the minimal representable DateTime, `-292277024-05-15T16:47:04.192`, and all valid +timestamps with in the range `[292277025-08-17T07:12:55.808, 2147483647-12-31T23:59:59.999]` +will be clamped to the maximal representable DateTime, `292277025-08-17T07:12:55.807`. + # Examples: ```julia julia> Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01") @@ -49,108 +61,155 @@ function _unsafe_datetime(y=0, m=1, d=1, h=0, mi=0, s=0, ms=0) rata = ms + 1000 * (s + 60mi + 3600h + 86400 * Dates.totaldays(y, m, d)) return DateTime(Dates.UTM(rata)) end +function _clamped_datetime(y, m, d, h=0, mi=0, s=0, ms=0) + dt = _unsafe_datetime(y, m, d, h, mi, s, ms) + y >= 292277025 && dt < ZERO_DATETIME && return MAX_DATETIME + y <= -292277024 && dt > ZERO_DATETIME && return MIN_DATETIME + return dt +end + +function _clamped_datetime_from_zoned(year::Int, zdt::ZonedDateTime) + dt = DateTime(zdt, TimeZones.UTC) + year >= 292277025 && dt < ZERO_DATETIME && return MAX_DATETIME + year <= -292277024 && dt > ZERO_DATETIME && return MIN_DATETIME + return dt +end + +const MAX_DATETIME = DateTime(Dates.UTM(typemax(Int))) +const MIN_DATETIME = DateTime(Dates.UTM(typemin(Int))) +const ZERO_DATETIME = DateTime(Dates.UTM(0)) -# [y]yyy-[m]m-[d]d((T|\s)HH:MM:SS(\.s{1,3}})?)?(zzzz|ZZZ|\Z)? +# [-]y{1,10}-[m]m-[d]d((T|\s)HH:MM:SS(\.s{1,3})?)?(zzzz|ZZZ|\Z)? Base.@propagate_inbounds function _default_tryparse_timestamp(buf, pos, len, code, b, options) delim = options.delim.token cq = options.cq.token rounding = options.rounding - # ensure there is enough room for at least yyyy-m-d - if len - pos < 8 + # ensure there is enough room for at least y-mm-dd + if len - pos + 1 < 7 (b != delim) && (code |= Parsers.INVALID) (pos >= len) && (code |= Parsers.EOF) return _unsafe_datetime(0), code, pos end + sign_mul = 1 + if b == UInt8('-') + sign_mul = -1 + pos += 1 + b = buf[pos] + end year = 0 - for i in 1:4 - b -= 0x30 - b > 0x09 && (return _unsafe_datetime(0), code | Parsers.INVALID, pos) - year = Int(b) + 10 * year - b = buf[pos += 1] - (i > 2 && b == UInt8('-')) && break + for i in 1:10 # 10 digits max, since that is the maximum length of a 32 bit integer, anything larger is invalid + b0 = b - 0x30 + b0 > 0x09 && (return _unsafe_datetime(0), code | Parsers.INVALID, pos) + year = Int(b0) + 10 * year + pos += 1 + pos > len && (return _unsafe_datetime(0), code | Parsers.INVALID | Parsers.EOF, pos) + b = buf[pos] + b == UInt8('-') && break end - b != UInt8('-') && (return _unsafe_datetime(year), code | Parsers.INVALID, pos) - b = buf[pos += 1] + year *= sign_mul + # If the year is larger than what can be represented by a 32 bit integer, fail to parse, + # values between typemin(Int32) and MIN_DATETIME are clamped to MIN_DATETIME + # values between typemax(Int32) and MAX_DATETIME are clamped to MAX_DATETIME + overflowed = (year > typemax(Int32) || year < typemin(Int32)) + if b != UInt8('-') || overflowed + overflowed || (code |= Parsers.OVERFLOW) + return (_unsafe_datetime(year), code | Parsers.INVALID, pos) + end + pos += 1 + pos > len && (return _unsafe_datetime(0), code | Parsers.INVALID | Parsers.EOF, pos) + b = buf[pos] month = 0 for _ in 1:2 - b -= 0x30 - b > 0x09 && (return _unsafe_datetime(year), code | Parsers.INVALID, pos) - month = Int(b) + 10 * month - b = buf[pos += 1] - b == UInt8('-') && break + b0 = b - 0x30 + b0 > 0x09 && (return _unsafe_datetime(year), code | Parsers.INVALID, pos) + month = Int(b0) + 10 * month + pos += 1 + pos > len && (return _unsafe_datetime(year), code | Parsers.INVALID | Parsers.EOF, pos) + b = buf[pos] end month > 12 && (return _unsafe_datetime(year), code | Parsers.INVALID, pos) - b != UInt8('-') && (return _unsafe_datetime(year, month), code | Parsers.INVALID, pos) - b = buf[pos += 1] + b != UInt8('-') && (return _unsafe_datetime(year, month), code | Parsers.INVALID, pos) + pos += 1 + pos > len && (return _unsafe_datetime(year, month), code | Parsers.INVALID | Parsers.EOF, pos) + b = buf[pos] day = 0 - for _ in 1:2 - b -= 0x30 - b > 0x09 && (return _unsafe_datetime(year, month), code | Parsers.INVALID, pos) - day = Int(b) + 10 * day - pos == len && (code |= Parsers.EOF; break) - b = buf[pos += 1] - (b == UInt8('T') || b == UInt8(' ')) && break + for i in 1:2 + b0 = b - 0x30 + b0 > 0x09 && (return _unsafe_datetime(year, month), code | Parsers.INVALID, pos) + day = Int(b0) + 10 * day + pos += 1 + if pos > len + code |= Parsers.EOF; + if i == 2 + break # 2 digit day at the very end of the buffer + else # 1 digit day is an error + return (_unsafe_datetime(year, month, day), code | Parsers.INVALID, pos) + end + else + b = buf[pos] + end end day > Dates.daysinmonth(year, month) && (return _unsafe_datetime(year, month), code | Parsers.INVALID, pos) - if (pos >= len || (b != UInt8('T') && b != UInt8(' '))) - if !(b == delim || b == cq) - code |= Parsers.EOF - pos += 1 - end - return _unsafe_datetime(year, month, day), code | Parsers.OK, pos + if (pos > len) || (b != UInt8('T') && b != UInt8(' ')) + return _clamped_datetime(year, month, day), code | Parsers.OK, pos end # ensure there is enough room for at least HH:MM:DD - len - pos < 8 && (return _unsafe_datetime(0), code | Parsers.INVALID, len) + len - pos + 1 < 8 && (return _unsafe_datetime(year, month, day), code | Parsers.INVALID, pos) b = buf[pos += 1] hour = 0 for _ in 1:2 - b -= 0x30 - b > 0x09 && (return _unsafe_datetime(year, month, day), code | Parsers.INVALID, pos) - hour = Int(b) + 10 * hour + b0 = b - 0x30 + b0 > 0x09 && (return _unsafe_datetime(year, month, day), code | Parsers.INVALID, pos) + hour = Int(b0) + 10 * hour b = buf[pos += 1] end - hour > 24 && (return _unsafe_datetime(year, month, day), code | Parsers.INVALID, pos) + hour >= 24 && (return _unsafe_datetime(year, month, day), code | Parsers.INVALID, pos) b != UInt8(':') && (return _unsafe_datetime(year, month, day, hour), code | Parsers.INVALID, pos) b = buf[pos += 1] minute = 0 for _ in 1:2 - b -= 0x30 - b > 0x09 && (return _unsafe_datetime(year, month, day, hour), code | Parsers.INVALID, pos) - minute = Int(b) + 10 * minute + b0 = b - 0x30 + b0 > 0x09 && (return _unsafe_datetime(year, month, day, hour), code | Parsers.INVALID, pos) + minute = Int(b0) + 10 * minute b = buf[pos += 1] end - minute > 60 && (return _unsafe_datetime(year, month, day, hour), code | Parsers.INVALID, pos) + minute >= 60 && (return _unsafe_datetime(year, month, day, hour), code | Parsers.INVALID, pos) b != UInt8(':') && (return _unsafe_datetime(year, month, day, hour, minute), code | Parsers.INVALID, pos) b = buf[pos += 1] second = 0 for _ in 1:2 - b -= 0x30 - b > 0x09 && (return _unsafe_datetime(year, month, day, hour, minute), code | Parsers.INVALID, pos) - second = Int(b) + 10 * second - pos == len && break - b = buf[pos += 1] + b0 = b - 0x30 + b0 > 0x09 && (return _unsafe_datetime(year, month, day, hour, minute), code | Parsers.INVALID, pos) + second = Int(b0) + 10 * second + pos += 1 + pos > len && (code |= Parsers.EOF; break) + b = buf[pos] end - if (pos == len || b == delim || b == cq) - code |= isnothing(Dates.validargs(DateTime, year, month, day, hour, minute, second, 0)) ? Parsers.OK : Parsers.INVALID - if !(b == delim || b == cq) - code |= Parsers.EOF - pos += 1 - end - return _unsafe_datetime(year, month, day, hour, minute, second), code, pos + second >= 60 && (return _unsafe_datetime(year, month, day, hour, minute), code | Parsers.INVALID, pos) + if pos > len + return _clamped_datetime(year, month, day, hour, minute, second), code | Parsers.OK, pos end millisecond = 0 if b == UInt8('.') i = 0 - while pos < len && ((b = (buf[pos += 1] - UInt8('0'))) <= 0x09) - millisecond = Int(b) + 10 * millisecond + pos += 1 + pos > len && (return _unsafe_datetime(year, month, day, hour, minute, second), code | Parsers.INVALID | Parsers.EOF, pos) + b = buf[pos] + while true + b0 = b - UInt8('0') + b0 > 0x09 && break i += 1 + millisecond = Int(b0) + 10 * millisecond + pos += 1 + pos > len && break + b = buf[pos] end i == 0 && (return _unsafe_datetime(year, month, day, hour, minute, second), code | Parsers.INVALID, pos) @@ -181,34 +240,26 @@ Base.@propagate_inbounds function _default_tryparse_timestamp(buf, pos, len, cod throw(ArgumentError("invalid rounding mode: $rounding")) end end - - b += UInt8('0') - if (pos == len || b == delim || b == cq) - code |= isnothing(Dates.validargs(DateTime, year, month, day, hour, minute, second, millisecond)) ? Parsers.OK : Parsers.INVALID - if !(b == delim || b == cq) - code |= Parsers.EOF - pos += 1 - end - return _unsafe_datetime(year, month, day, hour, minute, second, millisecond), code, pos - end + millisecond >= 1000 && (return _unsafe_datetime(year, month, day, hour, minute, second), code | Parsers.INVALID, pos) end b == UInt8(' ') && pos < len && (b = buf[pos += 1]) + tz, pos, code = _tryparse_timezone(buf, pos, b, len, code) - pos >= len && (code |= Parsers.EOF) - Parsers.invalid(code) && (return _unsafe_datetime(year, month, day, hour, minute, second, millisecond), code , pos) - if isnothing(Dates.validargs(ZonedDateTime, year, month, day, hour, minute, second, millisecond, tz)) - code |= Parsers.OK + pos > len && (code |= Parsers.EOF) + + dt = _clamped_datetime(year, month, day, hour, minute, second, millisecond) + code |= Parsers.OK + if isnothing(tz) + return (dt, code, pos) + else if tz === _Z # Avoiding TimeZones.ZonedDateTime to save some allocations in case the `tz` # corresponds to a UTC time zone. - return (_unsafe_datetime(year, month, day, hour, minute, second, millisecond), code, pos) + return (dt, code, pos) else - dt = _unsafe_datetime(year, month, day, hour, minute, second, millisecond) - ztd = TimeZones.ZonedDateTime(dt, TimeZones.TimeZone(tz)) - return (DateTime(ztd, TimeZones.UTC), code, pos) + zdt = TimeZones.ZonedDateTime(dt, TimeZones.TimeZone(tz)) + return (_clamped_datetime_from_zoned(year, zdt), code, pos) end - else - return (_unsafe_datetime(0), code | Parsers.INVALID, pos) end end @@ -217,6 +268,12 @@ end # This is needed until https://github.com/JuliaTime/TimeZones.jl/issues/271 is fixed const _Z = SubString("Z", 1:1) @inline function _tryparse_timezone(buf, pos, b, len, code) + # At this point we don't even know if there is a timezone to parse, we might be at the end of + # the field. So in case we get an invalid TZ here, we just return the _original_ code + # and `nothing` for the timezone, as if we never attempted to parse it. + # If this _was_ a true invalid timezone, the other layers in Parsers.jl will mark the value + # as invalid because we're at the very end of the field and if we leave any non-whitespace characters + # between the end of the value and the delimiter. nb = len - pos @inbounds if b == UInt8('+') || b == UInt8('-') if nb > 1 && buf[pos+1] == UInt8('0') @@ -232,8 +289,8 @@ const _Z = SubString("Z", 1:1) end end end - (tz, pos, _, code) = Parsers.tryparsenext(Dates.DatePart{'z'}(4, false), buf, pos, len, b, code) - return tz, pos, code + (tz, pos, _, code_tz) = Parsers.tryparsenext(Dates.DatePart{'z'}(4, false), buf, pos, len, b, code) + return tz, pos, Parsers.invalid(code_tz) ? code : code_tz end @inbounds if b == UInt8('G') @@ -247,8 +304,8 @@ const _Z = SubString("Z", 1:1) return (_Z, pos+3, code) # UTC end end - (tz, pos, _, code) = Parsers.tryparsenext(Dates.DatePart{'Z'}(3, false), buf, pos, len, b, code) - return tz, pos, code + (tz, pos, _, code_tz) = Parsers.tryparsenext(Dates.DatePart{'Z'}(3, false), buf, pos, len, b, code) + return tz, pos, Parsers.invalid(code_tz) ? code : code_tz end function Parsers.typeparser(::Parsers.AbstractConf{GuessDateTime}, source::AbstractVector{UInt8}, pos, len, b, code, pl, options) diff --git a/test/guess_datetime.jl b/test/guess_datetime.jl index fa3ab67..0170b99 100644 --- a/test/guess_datetime.jl +++ b/test/guess_datetime.jl @@ -2,78 +2,345 @@ using Dates using ChunkedCSV using Parsers using Test +using BenchmarkTools const DT = b"1969-07-20 20:17:00" -macro test_noalloc(e) :(@test(@allocated($(esc(e))) == 0)) end - -@testset "Parsing UTC equivalent timezones does not allocate" begin - for tz in (b"-00", b"+00", b"-0000", b"+0000", b"-00:00", b"+00:00", b"UTC", b"GMT") - dt = vcat(DT, tz) - @testset "$tz" begin - res = Parsers.xparse(ChunkedCSV.GuessDateTime, dt, 1, length(dt), Parsers.OPTIONS, DateTime) - @test res.val == DateTime(1969, 7, 20, 20, 17) - @test Parsers.ok(res.code) - @test_noalloc Parsers.xparse(ChunkedCSV.GuessDateTime, dt, 1, length(dt), Parsers.OPTIONS, DateTime) +@testset "GuessDateTime" begin + @testset "Parsing UTC equivalent timezones does not allocate" begin + for tz in (b"-00", b"+00", b"-0000", b"+0000", b"-00:00", b"+00:00", b"UTC", b"GMT") + dt = vcat(DT, tz) + @testset "$tz" begin + res = Parsers.xparse(ChunkedCSV.GuessDateTime, dt, 1, length(dt), Parsers.OPTIONS, DateTime) + @test res.val == DateTime(1969, 7, 20, 20, 17) + @test Parsers.ok(res.code) + @ballocated Parsers.xparse(ChunkedCSV.GuessDateTime, $dt, 1, length($dt), Parsers.OPTIONS, DateTime) == 0 + end end end -end -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01") -@test res.val == DateTime(2014, 1, 1) -@test Parsers.ok(res.code) + @testset "Datetimes with only the Date part" begin + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "0-01-01") + @test res.val == DateTime(0, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-0-01-01") + @test res.val == DateTime(0, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "1-01-01") + @test res.val == DateTime(1, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-1-01-01") + @test res.val == DateTime(-1, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "10-01-01") + @test res.val == DateTime(10, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-10-01-01") + @test res.val == DateTime(-10, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-001-01-01") + @test res.val == DateTime(-1, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "001-01-01") + @test res.val == DateTime(1, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-201-01-01") + @test res.val == DateTime(-201, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "201-01-01") + @test res.val == DateTime(201, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-2014-01-01") + @test res.val == DateTime(-2014, 1, 1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01") + @test res.val == DateTime(2014, 1, 1) + @test Parsers.ok(res.code) + end + + @testset "typemin and typemax" begin + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MIN_DATETIME)) + @test res.val == ChunkedCSV.MIN_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MAX_DATETIME)) + @test res.val == ChunkedCSV.MAX_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MIN_DATETIME, "Z")) + @test res.val == ChunkedCSV.MIN_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MAX_DATETIME, "Z")) + @test res.val == ChunkedCSV.MAX_DATETIME + @test Parsers.ok(res.code) + end + + + @testset "clamping" begin + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(typemax(Int32), "-12-31")) + @test res.val == ChunkedCSV.MAX_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(typemin(Int32), "-01-01")) + @test res.val == ChunkedCSV.MIN_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(typemax(Int32), "-12-31 23:59:59")) + @test res.val == ChunkedCSV.MAX_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(typemin(Int32), "-01-01 00:00:00")) + @test res.val == ChunkedCSV.MIN_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(typemax(Int32), "-12-31 23:59:59.999")) + @test res.val == ChunkedCSV.MAX_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(typemin(Int32), "-01-01 00:00:00.000")) + @test res.val == ChunkedCSV.MIN_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(typemax(Int32), "-12-31 23:59:59.999-0100")) + @test res.val == ChunkedCSV.MAX_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(typemin(Int32), "-01-01 00:00:00.000+0100")) + @test res.val == ChunkedCSV.MIN_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2147483647-12-31T23:59:59.999") + @test res.val == ChunkedCSV.MAX_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "292277025-08-17T07:12:55.808") + @test res.val == ChunkedCSV.MAX_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-2147483648-01-01T00:00:00.000") + @test res.val == ChunkedCSV.MIN_DATETIME + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-292277024-05-15T16:47:04.191") + @test res.val == ChunkedCSV.MIN_DATETIME + @test Parsers.ok(res.code) + end + + @testset "overflow" begin + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(Int(typemax(Int32))+1, "-12-31")) + @test Parsers.invalid(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(Int(typemin(Int32))-1, "-01-01")) + @test Parsers.invalid(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(Int(typemax(Int32))+1, "-12-31 23:59:59")) + @test Parsers.invalid(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(Int(typemin(Int32))-1, "-01-01 00:00:00")) + @test Parsers.invalid(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.7") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 700) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(Int(typemax(Int32))+1, "-12-31 23:59:59.999")) + @test Parsers.invalid(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.7") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 700) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(Int(typemin(Int32))-1, "-01-01 00:00:00.000")) + @test Parsers.invalid(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.78") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 780) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2147483648-01-01T00:00:00.000") + @test Parsers.invalid(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.78") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 780) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "-2147483649-12-31T23:59:59.999") + @test Parsers.invalid(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.789") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) -@test Parsers.ok(res.code) + end + + @testset "clamping due to timezone offset application" begin + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MIN_DATETIME, "+0100")) + @test res.val == ChunkedCSV.MIN_DATETIME # clamped + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MIN_DATETIME + Hour(1), "+0100")) + @test res.val == ChunkedCSV.MIN_DATETIME # exact + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MIN_DATETIME, "-0100")) + @test res.val == ChunkedCSV.MIN_DATETIME + Hour(1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MAX_DATETIME, "+0100")) + @test res.val == ChunkedCSV.MAX_DATETIME - Hour(1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MAX_DATETIME, "-0100")) + @test res.val == ChunkedCSV.MAX_DATETIME # clamped + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.MAX_DATETIME - Hour(1), "-0100")) + @test res.val == ChunkedCSV.MAX_DATETIME # exact + @test Parsers.ok(res.code) + + # crossing zero due to timezone offset + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.ZERO_DATETIME, "-0100")) + @test res.val == ChunkedCSV.ZERO_DATETIME + Hour(1) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, string(ChunkedCSV.ZERO_DATETIME, "+0100")) + @test res.val == ChunkedCSV.ZERO_DATETIME - Hour(1) + @test Parsers.ok(res.code) + end + + @testset "basic" begin + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.7") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 700) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.7") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 700) + @test Parsers.ok(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.789") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.78") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 780) + @test Parsers.ok(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.7890") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.78") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 780) + @test Parsers.ok(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.7890") -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.789") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.78901", rounding=RoundNearest) -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.789") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.78901", rounding=RoundNearest) -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) -@test Parsers.ok(res.code) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.789Z") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.789 Z") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.7890") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.7890") + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) + end -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.78901Z", rounding=RoundNearest) -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) -@test Parsers.ok(res.code) + @testset "rounding" begin + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.78901", rounding=RoundNearest) + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) -res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.78901Z", rounding=RoundNearest) -@test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.78901", rounding=RoundNearest) + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01 12:34:56.78901Z", rounding=RoundNearest) + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + @test Parsers.ok(res.code) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "2014-01-01T12:34:56.78901Z", rounding=RoundNearest) + @test res.val == DateTime(2014, 1, 1, 12, 34, 56, 789) + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "$(ChunkedCSV.MAX_DATETIME)01Z", rounding=RoundNearest) + @test res.val == ChunkedCSV.MAX_DATETIME + + res = Parsers.xparse(ChunkedCSV.GuessDateTime, "$(ChunkedCSV.MIN_DATETIME)01Z", rounding=RoundNearest) + @test res.val == ChunkedCSV.MIN_DATETIME + end + + @testset "invalid" begin + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "-01-01").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "0--01").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "0-01-").code) + + @test Parsers.ok( Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "--12-31").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-13-31").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-32").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-1-31").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-11-1").code) + + @test Parsers.ok( Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31 23:59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31 23:59:59.").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "--12-31 23:59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-13-31 23:59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-32 23:59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31 24:59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31 23:60:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-13-31 23:59:60.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-13-31 23:59:60.1000").code) + + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000 12-31 23:59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12 31 23:59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31x23-59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31 23 59:59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31 23:59 59.999").code) + @test Parsers.invalid(Parsers.xparse(ChunkedCSV.GuessDateTime, "2000-12-31 23:59:59 999").code) + + dt = "2000-12-31 23:59:59.999 GMT" + for i in 1:length(dt) + s = dt[1:i] * "N" + code = Parsers.xparse(ChunkedCSV.GuessDateTime, s).code + @test Parsers.invalid(code) + @test Parsers.eof(code) + code = Parsers.xparse(ChunkedCSV.GuessDateTime, s * ",").code + @test Parsers.invalid(code) + @test Parsers.delimited(code) + code = Parsers.xparse(ChunkedCSV.GuessDateTime, s * "\n").code + @test Parsers.invalid(code) + @test Parsers.newline(code) + @test Parsers.eof(code) + code = Parsers.xparse(ChunkedCSV.GuessDateTime, s * "\n,").code + @test Parsers.invalid(code) + @test Parsers.newline(code) + end + end + + @testset "parsing in context" begin + dt_data = [ + (DateTime(1234, 12, 31), "1234-12-31"), + (DateTime(1234, 12, 31, 12, 34, 56), "1234-12-31 12:34:56"), + (DateTime(1234, 12, 31, 12, 34, 56, 789), "1234-12-31 12:34:56.789"), + (DateTime(1234, 12, 31, 12, 34, 56, 789), "1234-12-31 12:34:56.789 GMT") + ] + + suffix_data = [ + (Parsers.OK | Parsers.EOF, ""), + (Parsers.OK | Parsers.DELIMITED, ","), + (Parsers.OK | Parsers.NEWLINE, "\n,"), + (Parsers.OK | Parsers.NEWLINE | Parsers.EOF, "\n"), + ] + @testset "$input" for (expected, input) in dt_data + @testset "$(repr(suffix))" for (code, suffix) in suffix_data + res = Parsers.xparse(ChunkedCSV.GuessDateTime, input * suffix) + @test res.val == expected + @test res.code == code + end + end + end +end