Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 34 additions & 32 deletions src/analysis.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using RecurrenceAnalysis: RecurrenceAnalysis as RA
#using MultipleTesting
using Distances
import RecurrenceAnalysis: tau_recurrence

"""
countvalid(xout, xin)

Expand All @@ -11,22 +12,42 @@ function countvalid(xout, xin)
xout .= count(!ismissing, xin)
end

"""
countvalidag(xout, xin)

Inner function to count the valid time steps in a datacube.
This function is aimed to be used inside of a mapCube call.
"""
function countvalidint(xout, xin)
xout .= count(x -> x != -9999, xin)
end
"""
countvalid(cube)

Outer function to count the number of valid time steps in a cube.
"""
countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=InDims("Time"), outdims=OutDims(; path))
countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=InDims("Time", filter=YAXArrays.DAT.NoFilter()), outdims=OutDims(; path))

@testitem "countvalid cube" begin
using RQADeforestation
using YAXArrays
using Dates
using DimensionalData: Ti, X, Y
using Statistics
import Random
using Missings
Random.seed!(1234)

mock_axes = (
Ti(Date("2022-01-01"):Day(1):Date("2022-01-30")),
X(range(1, 10, length=10)),
Y(range(1, 5, length=15)),
)
mock_data = allowmissing(rand(30, 10, 15))
mock_data[1:10,1,1] .= missing
mock_data[:, 2,1] .= missing
mock_data[[1,5,9], 2,2] .= missing
mock_props = Dict()
mock_cube = YAXArray(mock_axes, mock_data, mock_props)

mock_count = RQADeforestation.countvalid(mock_cube)
@test mock_count.axes == (mock_cube.X, mock_cube.Y)
@test mock_count[1,1] == 20
@test mock_count[1,2] == 30
@test mock_count[2,2] == 27
@test mock_count[2,1] == 0
end

"""
rqatrend(xout, xin, thresh)
Expand All @@ -35,9 +56,7 @@ Compute the RQA trend metric for the non-missing time steps of xin, and save it
`thresh` specifies the epsilon threshold of the Recurrence Plot computation
"""
function rqatrend_recurrenceanalysis(pix_trend, pix, thresh=2)
#replace!(pix, -9999 => missing)
ts = collect(skipmissing(pix))
#@show length(ts)
tau_pix = tau_recurrence(ts, thresh)
pix_trend .= RA._trend(tau_pix)
end
Expand All @@ -49,6 +68,7 @@ function rqatrend_matrix(pix_trend, pix, thresh=2)
pix_trend .= RA.trend(rm)
end

#=
"""
rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300)
Compute the RQA trend metric for shuffled time series of the data cube `cube` with the epsilon threshold `thresh` for `numshuffle` tries and save it into `path`.
Expand All @@ -58,25 +78,9 @@ function rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=
# TODO this looks completely broken
sg = surrogenerator(collect(eachindex(water[overlap])), BlockShuffle(7, shift=true))
end
=#


import RecurrenceAnalysis: tau_recurrence

function tau_recurrence(ts::AbstractVector, thresh, metric=Euclidean())
n = length(ts)
rr_τ = zeros(n)
for col in 1:n
for row in 1:(col-1)
d = evaluate(metric, ts[col], ts[row])
#@show row, col, d
rr_τ[col-row+1] += d <= thresh
end
end
rr_τ[1] = n
rr_τ ./ (n:-1:1)
#rr_τ
end

"""
anti_diagonal_density(ts, thresh, metric)
Compute the average density of the diagonals perpendicular to the main diagonal for data series `ts`.
Expand Down Expand Up @@ -110,6 +114,4 @@ function inner_postprocessing(rqadata, forestmask; threshold=-1.28, clustersize=
x
end
end
#@time labeldata = MultipleTesting.label_components(rqathresh,trues(3,3))
#@time clusterdata = MultipleTesting.maskcluster(rqathresh, labeldata, clustersize)
end
114 changes: 11 additions & 103 deletions src/auxil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ using DiskArrays: DiskArrays, GridChunks
using DiskArrayEngine: DiskArrayEngine as DAE
using DimensionalData: DimensionalData as DD, X, Y
using GeoFormatTypes
#using PyramidScheme: PyramidScheme as PS
using Rasters: Raster
import DiskArrays: readblock!, IrregularChunks, AbstractDiskArray
using StatsBase: rle
Expand Down Expand Up @@ -70,8 +69,15 @@ function DiskArrays.readblock!(b::BufferGDALBand, aout, r::AbstractUnitRange...)
end

function getdate(x, reg=r"[0-9]{8}T[0-9]{6}", df=dateformat"yyyymmddTHHMMSS")
m = match(reg, x).match
date = DateTime(m, df)
m = match(reg, x)
isnothing(m) && throw(ArgumentError("Did not find a datetime information in $x"))
date = DateTime(m.match, df)
end

@testitem "getdate" begin
using Dates
@test RQADeforestation.getdate("sometext20200919T202020_somemoretext1234") == DateTime(2020,9,19, 20,20,20)
@test_throws Exception RQADeforestation.getdate("sometext")
end

"""
Expand Down Expand Up @@ -110,6 +116,7 @@ function grouptimes(times, timediff=200000)
return groups
end


function stackindices(times, timediff=200000)
@assert issorted(times)
groups = zero(eachindex(times))
Expand Down Expand Up @@ -234,32 +241,6 @@ end
end


"""
agcube(filenames)
Open the underlying tiff files via ArchGDAL.
This opens all files and keeps them open.
This has a higher upfront cost, but might lead to a speedup down the line when we access the data.
"""
function agcube(filenames::AbstractVector{<:AbstractString})
dates = RQADeforestation.getdate.(filenames)
# Sort the dates and files by DateTime
p = sortperm(dates)
sdates = dates[p]
sfiles = filenames[p]
taxis = Ti(sdates)
datasets = AG.readraster.(sfiles)
yaxlist = YAXArray.(datasets)
return concatenatecubes(yaxlist, taxis)
end

function netcdfify(path)
c = Cube(path)
npath = splitext(path)[1] * ".nc"
fl32 = map(x -> ismissing(x) ? x : Float32(x), c)
fl32.properties["_FillValue"] *= 1.0f0
savecube(fl32, npath; compress=5)
end

const equi7crs = Dict(
"AF" => ProjString("+proj=aeqd +lat_0=8.5 +lon_0=21.5 +x_0=5621452.01998 +y_0=5990638.42298 +datum=WGS84 +units=m +no_defs"),
"AN" => ProjString("+proj=aeqd +lat_0=-90 +lon_0=0 +x_0=3714266.97719 +y_0=3402016.50625 +datum=WGS84 +units=m +no_defs"),
Expand All @@ -268,77 +249,4 @@ const equi7crs = Dict(
"NA" => ProjString("+proj=aeqd +lat_0=52 +lon_0=-97.5 +x_0=8264722.17686 +y_0=4867518.35323 +datum=WGS84 +units=m +no_defs"),
"OC" => ProjString("+proj=aeqd +lat_0=-19.5 +lon_0=131.5 +x_0=6988408.5356 +y_0=7654884.53733 +datum=WGS84 +units=m +no_defs"),
"SA" => ProjString("+proj=aeqd +lat_0=-14 +lon_0=-60.5 +x_0=7257179.23559 +y_0=5592024.44605 +datum=WGS84 +units=m +no_defs")
)

# Auxillary functions for masking with the forest data

function getsubtiles(tile)
east = eastint(tile)
north = northint(tile)
tiles = ["E$(lpad(e,3,"0"))N$(lpad(n, 3, "0"))T1" for e in east:(east+2), n in north:(north+2)]
return tiles
end

eastint(tile) = parse(Int, tile[2:4])
northint(tile) = parse(Int, tile[6:8])



function aggregate_forestry(tile)
subtiles = getsubtiles(tile)
foresttiles = [(parse.(Int, match(r"E(\d\d\d)N(\d\d\d)T1", t).captures)...,) => "/eodc/private/pangeojulia/ForestType/2017_FOREST-CLASSES_EU010M_$(t).tif" for t in subtiles]
filledtiles = filter(x -> isfile(last(x)), foresttiles)
if isempty(filledtiles)
return nothing
end


idx_to_fname = Dict(filledtiles...)
a = Cube(last(first(filledtiles)))
east = eastint(tile)
north = northint(tile)
f = ChunkedFillArray(a[1, 1], size(a), size.(DiskArrays.eachchunk(a)[1], 1))
allarrs = [haskey(idx_to_fname, (x, y)) ? Cube(idx_to_fname[(x, y)]).data : f for x in east:(east+2), y in north:(north+2)]

yaxs = Cube.(last.(filledtiles))
#ext = Extents.union(yaxs...)
#tilex = Rasters._mosaic(first.(dims.(yaxs))...)
#tiley = Rasters._mosaic(last.(dims.(yaxs))...)
diskarray_merged = DiskArrayTools.ConcatDiskArray(allarrs)

# We should first do the pyramid computation and then stitch non values along


#foryax = Cube.(filledtiles)
#forest = YAXArrays.Datasets.open_mfdataset(vec(foresttiles))

aggfor = [PS.gen_output(Union{Int8,Missing}, ceil.(Int, size(c) ./ 2)) for c in yaxs]
#a = aggfor[1]
#yax = foryax[1]
#PS.fill_pyramids(yax, a, x->sum(x) >0,true)
println("Start aggregating")
@time [PS.fill_pyramids(yaxs[i].data, aggfor[i], x -> count(!iszero, x) == 4 ? true : missing, true) for i in eachindex(yaxs)]
#tilepath = joinpath(indir, tile * suffix)
#aggyax = [Raster(aggfor[i][1][:,:,1], (PS.agg_axis(dims(yax,X), 2), PS.agg_axis(dims(yax, Y), 2))) for (i, yax) in enumerate(foryax)]
#ras = Raster(tilepath)
#allagg = ConcatDiskArray(only.(aggfor)[:,[3,2,1]])

#allagg = ConcatDiskArray(aggfor[:,[3,2,1]])
#allagg = ConcatDiskArray(only.(aggfor))
forras = Raster.(foresttiles, lazy=true)
xaxs = DD.dims.(forras[:, 1], X)
xaxsnew = [xax[begin:2:end] for xax in xaxs]
xax = vcat(xaxsnew...)
yaxs = DD.dims.(forras[1, :], Y)
yaxsnew = [yax[begin:2:end] for yax in yaxs]
yax = vcat(reverse(yaxsnew)...)
YAXArray((xax, yax), allagg[:, :, 1])
end

function maskforests(tilepath, outdir=".")
tile = match(r"E\d\d\dN\d\d\dT3", tilepath).match
forras = aggregate_forestry(tile)
ras = Raster(tilepath)
mras = forras .* ras
write(joinpath(outdir, "forestmasked_all" * tile * suffix), mras)
end
)
86 changes: 86 additions & 0 deletions src/forestdata.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@

# Auxillary functions for masking with the forest data
#=
function getsubtiles(tile)
east = eastint(tile)
north = northint(tile)
tiles = ["E$(lpad(e,3,"0"))N$(lpad(n, 3, "0"))T1" for e in east:(east+2), n in north:(north+2)]
return tiles
end

eastint(tile) = parse(Int, tile[2:4])
northint(tile) = parse(Int, tile[6:8])



function aggregate_forestry(tile)
subtiles = getsubtiles(tile)
foresttiles = [(parse.(Int, match(r"E(\d\d\d)N(\d\d\d)T1", t).captures)...,) => "/eodc/private/pangeojulia/ForestType/2017_FOREST-CLASSES_EU010M_$(t).tif" for t in subtiles]
filledtiles = filter(x -> isfile(last(x)), foresttiles)
if isempty(filledtiles)
return nothing
end


idx_to_fname = Dict(filledtiles...)
a = Cube(last(first(filledtiles)))
east = eastint(tile)
north = northint(tile)
f = ChunkedFillArray(a[1, 1], size(a), size.(DiskArrays.eachchunk(a)[1], 1))
allarrs = [haskey(idx_to_fname, (x, y)) ? Cube(idx_to_fname[(x, y)]).data : f for x in east:(east+2), y in north:(north+2)]

yaxs = Cube.(last.(filledtiles))
#ext = Extents.union(yaxs...)
#tilex = Rasters._mosaic(first.(dims.(yaxs))...)
#tiley = Rasters._mosaic(last.(dims.(yaxs))...)
diskarray_merged = DiskArrayTools.ConcatDiskArray(allarrs)

# We should first do the pyramid computation and then stitch non values along


#foryax = Cube.(filledtiles)
#forest = YAXArrays.Datasets.open_mfdataset(vec(foresttiles))

aggfor = [PS.gen_output(Union{Int8,Missing}, ceil.(Int, size(c) ./ 2)) for c in yaxs]
#a = aggfor[1]
#yax = foryax[1]
#PS.fill_pyramids(yax, a, x->sum(x) >0,true)
println("Start aggregating")
@time [PS.fill_pyramids(yaxs[i].data, aggfor[i], x -> count(!iszero, x) == 4 ? true : missing, true) for i in eachindex(yaxs)]
#tilepath = joinpath(indir, tile * suffix)
#aggyax = [Raster(aggfor[i][1][:,:,1], (PS.agg_axis(dims(yax,X), 2), PS.agg_axis(dims(yax, Y), 2))) for (i, yax) in enumerate(foryax)]
#ras = Raster(tilepath)
#allagg = ConcatDiskArray(only.(aggfor)[:,[3,2,1]])

#allagg = ConcatDiskArray(aggfor[:,[3,2,1]])
#allagg = ConcatDiskArray(only.(aggfor))
forras = Raster.(foresttiles, lazy=true)
xaxs = DD.dims.(forras[:, 1], X)
xaxsnew = [xax[begin:2:end] for xax in xaxs]
xax = vcat(xaxsnew...)
yaxs = DD.dims.(forras[1, :], Y)
yaxsnew = [yax[begin:2:end] for yax in yaxs]
yax = vcat(reverse(yaxsnew)...)
YAXArray((xax, yax), allagg[:, :, 1])
end

function maskforests(tilepath, outdir=".")
tile = match(r"E\d\d\dN\d\d\dT3", tilepath).match
forras = aggregate_forestry(tile)
ras = Raster(tilepath)
mras = forras .* ras
write(joinpath(outdir, "forestmasked_all" * tile * suffix), mras)
end

"""
netcdfify(path)
Convert the raster data at `path` to netcdf by converting the data to Float32 and resaving.
"""
function netcdfify(path)
c = Cube(path)
npath = splitext(path)[1] * ".nc"
fl32 = map(x -> ismissing(x) ? x : Float32(x), c)
fl32.properties["_FillValue"] *= 1.0f0
savecube(fl32, npath; compress=5)
end
=#
5 changes: 0 additions & 5 deletions src/main.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,6 @@ function main(;
rethrow(e)
end
end
#=@everywhere begin
fname = "$(VERSION)_$(getpid())_$(time_ns()).heapsnapshot"
Profile.take_heap_snapshot(fname;streaming=true)
end
=#
touch(path * ".done")
end
end
Expand Down
Loading