diff --git a/benchmark/cadence.jl b/benchmark/cadence.jl new file mode 100644 index 0000000..6e8700f --- /dev/null +++ b/benchmark/cadence.jl @@ -0,0 +1,45 @@ +using Chairmarks +using Dates +using SpaceDataModel.Times + +function slow_resolution(times; rtol = 1.0e-3, check = true) + dts = diff(times) + check && (all(d -> ≃(d, dts[1]; rtol), dts) || error("Data is not approximately uniformly sampled.")) + return dts[1] +end + +# Generate large approximately sampled times with small noise +function generate_approx_times(n, dt, noise_level = 1.0e-5) + base = range(0.0, step = dt, length = n) + return collect(base) .+ noise_level .* randn(n) +end + +function generate_approx_times2(n, dt, noise_level = 1.0e-5) + base = range(0.0, step = dt, length = n) + return 1.0e9 .* Nanosecond.(collect(base)) .+ round.(Int64, noise_level .* randn(n) .* 1.0e9) .* Nanosecond(1) +end + +# Benchmark suite +function run_resolution_benchmarks() + sizes = [10^3, 10^4, 10^5, 10^6] + dt = 1.0 + println("Resolution Benchmark") + println("="^60) + + for n in sizes + times_range = range(0.0, step = dt, length = n) + for f in [generate_approx_times, generate_approx_times2] + times = f(n, dt) + b_range = @b cadence($times_range) samples = 10 evals = 3 + b_check = @b cadence($times; check = true) samples = 10 evals = 3 + b_check2 = @b slow_resolution($times; check = true) samples = 10 evals = 3 + println("\nn = $(n):") + @info "b_check" b_check b_check2 b_range + end + end + + return +end + + +run_resolution_benchmarks() diff --git a/src/SpaceDataModel.jl b/src/SpaceDataModel.jl index c7bc29c..42bc9b2 100644 --- a/src/SpaceDataModel.jl +++ b/src/SpaceDataModel.jl @@ -27,4 +27,7 @@ include("workload.jl") include("variable_interface.jl") +include("times.jl") +using .Times + end diff --git a/src/times.jl b/src/times.jl new file mode 100644 index 0000000..c5d9aee --- /dev/null +++ b/src/times.jl @@ -0,0 +1,38 @@ +# Reference: https://github.com/JuliaAPlavin/DateFormats.jl +module Times +using Dates +using Dates: AbstractTime +export ≃, cadence + +/ₜ(x, n) = x / n +/ₜ(x::AbstractTime, n) = Nanosecond(round(Int64, Dates.tons(x) / n)) + +# workaround for `no method matching isapprox(::Nanosecond, ::Nanosecond)` +≃(x, y; kw...) = isapprox(x, y; kw...) +≃(x::AbstractTime, y; kw...) = isapprox(Dates.tons(x), Dates.tons(y); kw...) + +""" + cadence(times; rtol=1.0e-3, check=true) + cadence(T<:Real, times; rtol=1.0e-3, check=true) + +Return the time step of uniformly sampled `times`. + +If `check=true`, validates that samples are approximately uniform within `rtol`. +Pass a type `T<:Real` to return the cadence in seconds as that type. +""" +cadence(times::AbstractRange) = step(times) +function cadence(times; rtol = 1.0e-3, check = true) + N = length(times) + @assert N > 1 + dt0 = /ₜ(times[N] - times[1], N - 1) + check && @inbounds for i in 1:(N - 1) + dt = times[i + 1] - times[i] + @assert ≃(dt, dt0; rtol) "Data is not approximately uniformly sampled." + end + return dt0 +end +function cadence(T::Type{<:Real}, times; kw...) + dt = cadence(times; kw...) + return dt isa AbstractTime ? T(Dates.tons(dt) / 1.0e9) : T(dt) +end +end diff --git a/test/Project.toml b/test/Project.toml index 508d0cb..ef22268 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,6 +2,7 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CheckConcreteStructs = "73c92db5-9da6-4938-911a-6443a7e94a58" CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/test/times.jl b/test/times.jl new file mode 100644 index 0000000..e984441 --- /dev/null +++ b/test/times.jl @@ -0,0 +1,31 @@ +@testitem "cadence" begin + using Dates + using SpaceDataModel: cadence, ≃ + + # AbstractRange - returns step directly + @test cadence(1:10) == 1 + @test cadence(0.0:0.5:10.0) == 0.5 + @test cadence(Nanosecond(1):Nanosecond(1):Nanosecond(100)) == Nanosecond(1) + + # Vector{Float64} + times = collect(0.0:1.0:100.0) + @test cadence(times) == 1.0 + @test cadence(Float64, times) == 1.0 + + # Approximately uniform sampling + times_noisy = times .+ 1.0e-6 .* randn(length(times)) + @test ≃(cadence(times_noisy), 1.0; rtol = 1.0e-3) + + # Non-uniform sampling should error + times_bad = [0.0, 1.0, 3.0, 4.0] + @test_throws AssertionError cadence(times_bad) + @test cadence(times_bad; check = false) ≈ 4.0 / 3 + + # Dates support + dt_range = DateTime(2020):Hour(1):DateTime(2020, 1, 2) + @test cadence(dt_range) == Hour(1) + + ns_times = Nanosecond.(0:1_000_000_000:10_000_000_000) + @test cadence(ns_times) == Nanosecond(1_000_000_000) + @test cadence(Float64, ns_times) == 1.0 +end