diff --git a/src/analysis.jl b/src/analysis.jl index 7c4a757..b1c8131 100644 --- a/src/analysis.jl +++ b/src/analysis.jl @@ -1,6 +1,7 @@ using RecurrenceAnalysis: RecurrenceAnalysis as RA -#using MultipleTesting using Distances +import RecurrenceAnalysis: tau_recurrence + """ countvalid(xout, xin) @@ -11,22 +12,42 @@ function countvalid(xout, xin) xout .= count(!ismissing, xin) end -""" -countvalidag(xout, xin) - - Inner function to count the valid time steps in a datacube. - This function is aimed to be used inside of a mapCube call. -""" -function countvalidint(xout, xin) - xout .= count(x -> x != -9999, xin) -end """ countvalid(cube) Outer function to count the number of valid time steps in a cube. """ -countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=InDims("Time"), outdims=OutDims(; path)) +countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=InDims("Time", filter=YAXArrays.DAT.NoFilter()), outdims=OutDims(; path)) + +@testitem "countvalid cube" begin + using RQADeforestation + using YAXArrays + using Dates + using DimensionalData: Ti, X, Y + using Statistics + import Random + using Missings + Random.seed!(1234) + mock_axes = ( + Ti(Date("2022-01-01"):Day(1):Date("2022-01-30")), + X(range(1, 10, length=10)), + Y(range(1, 5, length=15)), + ) + mock_data = allowmissing(rand(30, 10, 15)) + mock_data[1:10,1,1] .= missing + mock_data[:, 2,1] .= missing + mock_data[[1,5,9], 2,2] .= missing + mock_props = Dict() + mock_cube = YAXArray(mock_axes, mock_data, mock_props) + + mock_count = RQADeforestation.countvalid(mock_cube) + @test mock_count.axes == (mock_cube.X, mock_cube.Y) + @test mock_count[1,1] == 20 + @test mock_count[1,2] == 30 + @test mock_count[2,2] == 27 + @test mock_count[2,1] == 0 +end """ rqatrend(xout, xin, thresh) @@ -35,9 +56,7 @@ Compute the RQA trend metric for the non-missing time steps of xin, and save it `thresh` specifies the epsilon threshold of the Recurrence Plot computation """ function rqatrend_recurrenceanalysis(pix_trend, pix, thresh=2) - #replace!(pix, -9999 => missing) ts = collect(skipmissing(pix)) - #@show length(ts) tau_pix = tau_recurrence(ts, thresh) pix_trend .= RA._trend(tau_pix) end @@ -49,6 +68,7 @@ function rqatrend_matrix(pix_trend, pix, thresh=2) pix_trend .= RA.trend(rm) end +#= """ rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300) Compute the RQA trend metric for shuffled time series of the data cube `cube` with the epsilon threshold `thresh` for `numshuffle` tries and save it into `path`. @@ -58,25 +78,9 @@ function rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle= # TODO this looks completely broken sg = surrogenerator(collect(eachindex(water[overlap])), BlockShuffle(7, shift=true)) end +=# -import RecurrenceAnalysis: tau_recurrence - -function tau_recurrence(ts::AbstractVector, thresh, metric=Euclidean()) - n = length(ts) - rr_τ = zeros(n) - for col in 1:n - for row in 1:(col-1) - d = evaluate(metric, ts[col], ts[row]) - #@show row, col, d - rr_τ[col-row+1] += d <= thresh - end - end - rr_τ[1] = n - rr_τ ./ (n:-1:1) - #rr_τ -end - """ anti_diagonal_density(ts, thresh, metric) Compute the average density of the diagonals perpendicular to the main diagonal for data series `ts`. @@ -110,6 +114,4 @@ function inner_postprocessing(rqadata, forestmask; threshold=-1.28, clustersize= x end end - #@time labeldata = MultipleTesting.label_components(rqathresh,trues(3,3)) - #@time clusterdata = MultipleTesting.maskcluster(rqathresh, labeldata, clustersize) end \ No newline at end of file diff --git a/src/auxil.jl b/src/auxil.jl index 8295f13..2da4998 100644 --- a/src/auxil.jl +++ b/src/auxil.jl @@ -4,7 +4,6 @@ using DiskArrays: DiskArrays, GridChunks using DiskArrayEngine: DiskArrayEngine as DAE using DimensionalData: DimensionalData as DD, X, Y using GeoFormatTypes -#using PyramidScheme: PyramidScheme as PS using Rasters: Raster import DiskArrays: readblock!, IrregularChunks, AbstractDiskArray using StatsBase: rle @@ -70,8 +69,15 @@ function DiskArrays.readblock!(b::BufferGDALBand, aout, r::AbstractUnitRange...) end function getdate(x, reg=r"[0-9]{8}T[0-9]{6}", df=dateformat"yyyymmddTHHMMSS") - m = match(reg, x).match - date = DateTime(m, df) + m = match(reg, x) + isnothing(m) && throw(ArgumentError("Did not find a datetime information in $x")) + date = DateTime(m.match, df) +end + +@testitem "getdate" begin + using Dates + @test RQADeforestation.getdate("sometext20200919T202020_somemoretext1234") == DateTime(2020,9,19, 20,20,20) + @test_throws Exception RQADeforestation.getdate("sometext") end """ @@ -110,6 +116,7 @@ function grouptimes(times, timediff=200000) return groups end + function stackindices(times, timediff=200000) @assert issorted(times) groups = zero(eachindex(times)) @@ -234,32 +241,6 @@ end end -""" -agcube(filenames) - Open the underlying tiff files via ArchGDAL. - This opens all files and keeps them open. - This has a higher upfront cost, but might lead to a speedup down the line when we access the data. -""" -function agcube(filenames::AbstractVector{<:AbstractString}) - dates = RQADeforestation.getdate.(filenames) - # Sort the dates and files by DateTime - p = sortperm(dates) - sdates = dates[p] - sfiles = filenames[p] - taxis = Ti(sdates) - datasets = AG.readraster.(sfiles) - yaxlist = YAXArray.(datasets) - return concatenatecubes(yaxlist, taxis) -end - -function netcdfify(path) - c = Cube(path) - npath = splitext(path)[1] * ".nc" - fl32 = map(x -> ismissing(x) ? x : Float32(x), c) - fl32.properties["_FillValue"] *= 1.0f0 - savecube(fl32, npath; compress=5) -end - const equi7crs = Dict( "AF" => ProjString("+proj=aeqd +lat_0=8.5 +lon_0=21.5 +x_0=5621452.01998 +y_0=5990638.42298 +datum=WGS84 +units=m +no_defs"), "AN" => ProjString("+proj=aeqd +lat_0=-90 +lon_0=0 +x_0=3714266.97719 +y_0=3402016.50625 +datum=WGS84 +units=m +no_defs"), @@ -268,77 +249,4 @@ const equi7crs = Dict( "NA" => ProjString("+proj=aeqd +lat_0=52 +lon_0=-97.5 +x_0=8264722.17686 +y_0=4867518.35323 +datum=WGS84 +units=m +no_defs"), "OC" => ProjString("+proj=aeqd +lat_0=-19.5 +lon_0=131.5 +x_0=6988408.5356 +y_0=7654884.53733 +datum=WGS84 +units=m +no_defs"), "SA" => ProjString("+proj=aeqd +lat_0=-14 +lon_0=-60.5 +x_0=7257179.23559 +y_0=5592024.44605 +datum=WGS84 +units=m +no_defs") -) - -# Auxillary functions for masking with the forest data - -function getsubtiles(tile) - east = eastint(tile) - north = northint(tile) - tiles = ["E$(lpad(e,3,"0"))N$(lpad(n, 3, "0"))T1" for e in east:(east+2), n in north:(north+2)] - return tiles -end - -eastint(tile) = parse(Int, tile[2:4]) -northint(tile) = parse(Int, tile[6:8]) - - - -function aggregate_forestry(tile) - subtiles = getsubtiles(tile) - foresttiles = [(parse.(Int, match(r"E(\d\d\d)N(\d\d\d)T1", t).captures)...,) => "/eodc/private/pangeojulia/ForestType/2017_FOREST-CLASSES_EU010M_$(t).tif" for t in subtiles] - filledtiles = filter(x -> isfile(last(x)), foresttiles) - if isempty(filledtiles) - return nothing - end - - - idx_to_fname = Dict(filledtiles...) - a = Cube(last(first(filledtiles))) - east = eastint(tile) - north = northint(tile) - f = ChunkedFillArray(a[1, 1], size(a), size.(DiskArrays.eachchunk(a)[1], 1)) - allarrs = [haskey(idx_to_fname, (x, y)) ? Cube(idx_to_fname[(x, y)]).data : f for x in east:(east+2), y in north:(north+2)] - - yaxs = Cube.(last.(filledtiles)) - #ext = Extents.union(yaxs...) - #tilex = Rasters._mosaic(first.(dims.(yaxs))...) - #tiley = Rasters._mosaic(last.(dims.(yaxs))...) - diskarray_merged = DiskArrayTools.ConcatDiskArray(allarrs) - - # We should first do the pyramid computation and then stitch non values along - - - #foryax = Cube.(filledtiles) - #forest = YAXArrays.Datasets.open_mfdataset(vec(foresttiles)) - - aggfor = [PS.gen_output(Union{Int8,Missing}, ceil.(Int, size(c) ./ 2)) for c in yaxs] - #a = aggfor[1] - #yax = foryax[1] - #PS.fill_pyramids(yax, a, x->sum(x) >0,true) - println("Start aggregating") - @time [PS.fill_pyramids(yaxs[i].data, aggfor[i], x -> count(!iszero, x) == 4 ? true : missing, true) for i in eachindex(yaxs)] - #tilepath = joinpath(indir, tile * suffix) - #aggyax = [Raster(aggfor[i][1][:,:,1], (PS.agg_axis(dims(yax,X), 2), PS.agg_axis(dims(yax, Y), 2))) for (i, yax) in enumerate(foryax)] - #ras = Raster(tilepath) - #allagg = ConcatDiskArray(only.(aggfor)[:,[3,2,1]]) - - #allagg = ConcatDiskArray(aggfor[:,[3,2,1]]) - #allagg = ConcatDiskArray(only.(aggfor)) - forras = Raster.(foresttiles, lazy=true) - xaxs = DD.dims.(forras[:, 1], X) - xaxsnew = [xax[begin:2:end] for xax in xaxs] - xax = vcat(xaxsnew...) - yaxs = DD.dims.(forras[1, :], Y) - yaxsnew = [yax[begin:2:end] for yax in yaxs] - yax = vcat(reverse(yaxsnew)...) - YAXArray((xax, yax), allagg[:, :, 1]) -end - -function maskforests(tilepath, outdir=".") - tile = match(r"E\d\d\dN\d\d\dT3", tilepath).match - forras = aggregate_forestry(tile) - ras = Raster(tilepath) - mras = forras .* ras - write(joinpath(outdir, "forestmasked_all" * tile * suffix), mras) -end \ No newline at end of file +) \ No newline at end of file diff --git a/src/forestdata.jl b/src/forestdata.jl new file mode 100644 index 0000000..f9bb0fc --- /dev/null +++ b/src/forestdata.jl @@ -0,0 +1,86 @@ + +# Auxillary functions for masking with the forest data +#= +function getsubtiles(tile) + east = eastint(tile) + north = northint(tile) + tiles = ["E$(lpad(e,3,"0"))N$(lpad(n, 3, "0"))T1" for e in east:(east+2), n in north:(north+2)] + return tiles +end + +eastint(tile) = parse(Int, tile[2:4]) +northint(tile) = parse(Int, tile[6:8]) + + + +function aggregate_forestry(tile) + subtiles = getsubtiles(tile) + foresttiles = [(parse.(Int, match(r"E(\d\d\d)N(\d\d\d)T1", t).captures)...,) => "/eodc/private/pangeojulia/ForestType/2017_FOREST-CLASSES_EU010M_$(t).tif" for t in subtiles] + filledtiles = filter(x -> isfile(last(x)), foresttiles) + if isempty(filledtiles) + return nothing + end + + + idx_to_fname = Dict(filledtiles...) + a = Cube(last(first(filledtiles))) + east = eastint(tile) + north = northint(tile) + f = ChunkedFillArray(a[1, 1], size(a), size.(DiskArrays.eachchunk(a)[1], 1)) + allarrs = [haskey(idx_to_fname, (x, y)) ? Cube(idx_to_fname[(x, y)]).data : f for x in east:(east+2), y in north:(north+2)] + + yaxs = Cube.(last.(filledtiles)) + #ext = Extents.union(yaxs...) + #tilex = Rasters._mosaic(first.(dims.(yaxs))...) + #tiley = Rasters._mosaic(last.(dims.(yaxs))...) + diskarray_merged = DiskArrayTools.ConcatDiskArray(allarrs) + + # We should first do the pyramid computation and then stitch non values along + + + #foryax = Cube.(filledtiles) + #forest = YAXArrays.Datasets.open_mfdataset(vec(foresttiles)) + + aggfor = [PS.gen_output(Union{Int8,Missing}, ceil.(Int, size(c) ./ 2)) for c in yaxs] + #a = aggfor[1] + #yax = foryax[1] + #PS.fill_pyramids(yax, a, x->sum(x) >0,true) + println("Start aggregating") + @time [PS.fill_pyramids(yaxs[i].data, aggfor[i], x -> count(!iszero, x) == 4 ? true : missing, true) for i in eachindex(yaxs)] + #tilepath = joinpath(indir, tile * suffix) + #aggyax = [Raster(aggfor[i][1][:,:,1], (PS.agg_axis(dims(yax,X), 2), PS.agg_axis(dims(yax, Y), 2))) for (i, yax) in enumerate(foryax)] + #ras = Raster(tilepath) + #allagg = ConcatDiskArray(only.(aggfor)[:,[3,2,1]]) + + #allagg = ConcatDiskArray(aggfor[:,[3,2,1]]) + #allagg = ConcatDiskArray(only.(aggfor)) + forras = Raster.(foresttiles, lazy=true) + xaxs = DD.dims.(forras[:, 1], X) + xaxsnew = [xax[begin:2:end] for xax in xaxs] + xax = vcat(xaxsnew...) + yaxs = DD.dims.(forras[1, :], Y) + yaxsnew = [yax[begin:2:end] for yax in yaxs] + yax = vcat(reverse(yaxsnew)...) + YAXArray((xax, yax), allagg[:, :, 1]) +end + +function maskforests(tilepath, outdir=".") + tile = match(r"E\d\d\dN\d\d\dT3", tilepath).match + forras = aggregate_forestry(tile) + ras = Raster(tilepath) + mras = forras .* ras + write(joinpath(outdir, "forestmasked_all" * tile * suffix), mras) +end + +""" + netcdfify(path) +Convert the raster data at `path` to netcdf by converting the data to Float32 and resaving. +""" +function netcdfify(path) + c = Cube(path) + npath = splitext(path)[1] * ".nc" + fl32 = map(x -> ismissing(x) ? x : Float32(x), c) + fl32.properties["_FillValue"] *= 1.0f0 + savecube(fl32, npath; compress=5) +end +=# \ No newline at end of file diff --git a/src/main.jl b/src/main.jl index 825991e..3d535f6 100644 --- a/src/main.jl +++ b/src/main.jl @@ -134,11 +134,6 @@ function main(; rethrow(e) end end - #=@everywhere begin - fname = "$(VERSION)_$(getpid())_$(time_ns()).heapsnapshot" - Profile.take_heap_snapshot(fname;streaming=true) - end - =# touch(path * ".done") end end