Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "RQADeforestation"
uuid = "3f1d9318-18cc-4ffd-9937-04025ce33b74"
authors = ["Felix Cremer <[email protected]>, Daniel Loos <[email protected]> and contributors"]
version = "1.0.0-DEV"
authors = ["Felix Cremer <[email protected]>, Daniel Loos <[email protected]> and contributors"]

[deps]
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
Expand Down
98 changes: 29 additions & 69 deletions src/auxil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,16 @@ function DiskArrays.readblock!(a::LazyAggDiskArray, aout, i::UnitRange{Int}...)
for (j, it) in enumerate(itime)
arrays_now = a.arrays[a.inds[it]]
for ia in eachindex(arrays_now)
DiskArrays.readblock!(arrays_now[ia], view(buf, :, :, ia), i1, i2)
try
DiskArrays.readblock!(arrays_now[ia], view(buf, :, :, ia), i1, i2)
catch e
if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError
@warn e.captured.ex.msg
buf[:,:,ia] .= missing
else
rethrow(e)
end
end
end
vbuf = view(buf, :, :, 1:length(arrays_now))
map!(a.f, view(aout, :, :, j), eachslice(vbuf, dims=(1, 2)))
Expand Down Expand Up @@ -135,27 +144,7 @@ function stackindices(times, timediff=200000)
return groups
end

#=
function DiskArrays.readblock!(b::GDALBand, aout, r::AbstractUnitRange...)
if !isa(aout,Matrix)
aout2 = similar(aout)
AG.read(b.filename) do ds
AG.getband(ds, b.band) do bh
DiskArrays.readblock!(bh, aout2, r...)
end
end
aout .= aout2
else
AG.read(b.filename) do ds
AG.getband(ds, b.band) do bh
DiskArrays.readblock!(bh, aout, r...)
end
end
end
end
=#

function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:dae)
function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:lazyagg)
dates = getdate.(filenames)
@show length(dates)
# Sort the dates and files by DateTime
Expand All @@ -165,7 +154,6 @@ function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:dae)

#@show sdates
# Put the dates which are 200 seconds apart into groups
if stackgroups in [:dae, :lazyagg]
groupinds = grouptimes(sdates, 200000)
onefile = first(sfiles)
gd = backendlist[:gdal]
Expand All @@ -184,60 +172,32 @@ function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:dae)

cubelist = CFDiskArray.(group_gdbs, (gdbattrs,))
stackinds = stackindices(sdates)
aggdata = if stackgroups == :dae
gcube = diskstack(cubelist)
aggdata = DAE.aggregate_diskarray(gcube, mean ∘ skipmissing, (3 => stackinds,); strategy=:direct)
else
println("Construct lazy diskarray")
LazyAggDiskArray(mean ∘ skipmissing, cubelist, stackinds)
end
println("Construct lazy diskarray")
aggdata = LazyAggDiskArray(skipmissingmean, cubelist, stackinds)
# data = DiskArrays.ConcatDiskArray(reshape(groupcubes, (1,1,length(groupcubes))))
dates_grouped = [sdates[group[begin]] for group in groupinds]

taxis = DD.Ti(dates_grouped)
gcube = Cube(sfiles[1])
return YAXArray((DD.dims(gcube)[1:2]..., taxis), aggdata, gcube.properties,)
else
#datasets = AG.readraster.(sfiles)
taxis = DD.Ti(sdates)

onefile = first(sfiles)
gd = backendlist[:gdal]
yax1 = gd(onefile)
onecube = Cube(onefile)
#@show onecube.axes
gdb = get_var_handle(yax1, "Gray")
end

#@assert gdb isa GDALBand
all_gdbs = map(sfiles) do f
BufferGDALBand{eltype(gdb)}(f, gdb.band, gdb.size, gdb.attrs, gdb.cs, Dict{Int,AG.IRasterBand}())
end
stacked_gdbs = diskstack(all_gdbs)
attrs = copy(gdb.attrs)
#attrs["add_offset"] = Float16(attrs["add_offset"])
if haskey(attrs, "scale_factor")
attrs["scale_factor"] = Float16(attrs["scale_factor"])
end
all_cfs = CFDiskArray(stacked_gdbs, attrs)
return YAXArray((onecube.axes..., taxis), all_cfs, onecube.properties)
function skipmissingmean(x)
isempty(x) && return missing
s,n = reduce(x,init=(zero(eltype(x)),0)) do (s,n), ix
ismissing(ix) ? (s,n) : (s+ix,n+1)
end
#datasetgroups = [datasets[group] for group in groupinds]
#We have to save the vrts because the usage of nested vrts is not working as a rasterdataset
#temp = tempdir()
#outpaths = [joinpath(temp, splitext(basename(sfiles[group][1]))[1] * ".vrt") for group in groupinds]
#vrt_grouped = AG.unsafe_gdalbuildvrt.(datasetgroups)
#AG.write.(vrt_grouped, outpaths)
#vrt_grouped = AG.read.(outpaths)
#vrt_vv = AG.unsafe_gdalbuildvrt(vrt_grouped, ["-separate"])
#rvrt_vv = AG.RasterDataset(vrt_vv)
#yaxras = YAXArray.(sfiles)
#cube = concatenatecubes(yaxras, taxis)
#bandnames = AG.GDAL.gdalgetfilelist(vrt_vv.ptr)



# Set the timesteps from the bandnames as time axis
#dates_grouped = [sdates[group[begin]] for group in groupinds]
n==0 ? missing : s/n
end

# I don't dare to make this type piracy.
#Base.∘(::typeof(mean), ::typeof(skipmissing)) = skipmissingmean

@testitem "skipmissingmean" begin
@test ismissing(RQADeforestation.skipmissingmean(Float32[]))
@test ismissing(RQADeforestation.skipmissingmean(Union{Float32, Missing}[missing, missing]))
@test RQADeforestation.skipmissingmean([1,0,missing]) == 0.5
@test RQADeforestation.skipmissingmean([1,2,3]) == 2.0
end


Expand Down
57 changes: 44 additions & 13 deletions src/main.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using ArgParse
using YAXArrays: YAXDefaults
using FilePathsBase: exists, Path


const argparsesettings = ArgParseSettings()
Expand Down Expand Up @@ -66,7 +67,7 @@ end


function main(;
tiles::Vector{String},
tiles,
continent::String,
indir::String,
outdir="out.zarr",
Expand All @@ -76,8 +77,10 @@ function main(;
orbit="D",
threshold=3.0,
folders=["V1M0R1", "V1M1R1", "V1M1R2"],
stack=:dae
stack=:lazyagg,
delete_intermediate=false
)
outdir = Path(outdir)
in(orbit, ["A", "D"]) || error("Orbit needs to be either A or D")
if isdir(indir) && isempty(indir)
error("Input directory $indir must not be empty")
Expand All @@ -93,23 +96,24 @@ function main(;
@warn "Selected time series does not include a multiple of whole years. This might introduce seasonal bias."
end

YAXDefaults.workdir[] = outdir

corruptedfiles = "corrupted_tiles.txt"
# TODO save the corrupt files to a txt for investigation
for tilefolder in tiles
filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders]
allfilenames = collect(Iterators.flatten(filenamelist))

relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames])
@show relorbits

for relorbit in relorbits
filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)]
@time cube = gdalcube(filenames, stack)

path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)")
path = joinpath(outdir, "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)_$(start_date)_$(end_date)")
#s3path = "s3://"*joinpath(outstore.bucket, path)
@show path
exists(path * ".done") && continue
exists(path * "_zerotimesteps.done") && continue
filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)]
@time "cube construction" cube = gdalcube(filenames, stack)

ispath(path * ".done") && continue
ispath(path * "_zerotimesteps.done") && continue

Expand All @@ -121,14 +125,41 @@ function main(;
continue
end
try
outpath = path * ".zarr"
@time rqatrend(tcube; thresh=threshold, outpath=outpath, overwrite=true)
Zarr.consolidate_metadata(outpath)
orbitoutpath = path * ".zarr/"
# This is only necessary because overwrite=true doesn't work on S3 based Zarr files in YAXArrays
# See https://github.com/JuliaDataCubes/YAXArrays.jl/issues/511
if exists(orbitoutpath)
println("Deleting path $orbitoutpath")
rm(orbitoutpath, recursive=true)
end
@show orbitoutpath
# We save locally and then save a rechunked version in the cloud,
# because the chunking is suboptimal which we get from the automatic setting.
tmppath = tempname() * ".zarr"
@time "rqatrend" rqatrend(tcube; thresh=threshold, outpath=tmppath, overwrite=true)
c = Cube(tmppath)
@time "save to S3" savecube(setchunks(c, (15000,15000)), string(orbitoutpath))
rm(tmppath, recursive=true)
@show delete_intermediate
if delete_intermediate == false
#PyramidScheme.buildpyramids(orbitoutpath)
Zarr.consolidate_metadata(string(orbitoutpath))
end
catch e

println("inside catch")
if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError
println("Found GDALError:")
msg = e.captured.ex.msg
corruptfile = split(msg, " ")[1][1:end-1]
corrupt_parts = split(corruptfile, "_")
foldername = corrupt_parts[end-1]
continentfolder = corrupt_parts[end-2]
corruptpath = joinpath(indir, foldername, "EQUI7_$continentfolder", tilefolder, corruptfile)
println("Corrupted input file")
println(corruptpath)
println(joinpath(indir, ))
println(e.captured.ex.msg)
println(corruptedfiles, "Found GDALError:")
println(corruptedfiles, e.captured.ex.msg)
continue
else
rethrow(e)
Expand Down
42 changes: 33 additions & 9 deletions src/rqatrend.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ using Distances
"""rqatrend(cube;thresh=2, path=tempname() * ".zarr")

Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`.
`lowerbound` and `upperbound` are forwarded to the classification of the RQA Trend result.
"""
function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...)
@show outpath
mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=Float32, path=outpath, overwrite, kwargs...))
function rqatrend(cube; thresh=2, lowerbound=-5, upperbound=-0.5, outpath=tempname() * ".zarr", overwrite=false, kwargs...)
mapCube(rqatrend, cube, thresh, lowerbound, upperbound; indims=InDims("Time"), outdims=OutDims(; outtype=UInt8, path=outpath, fill_value=255, overwrite, kwargs...))
end

@testitem "rqatrend cube" begin
Expand All @@ -32,27 +32,51 @@ end
mock_trend = rqatrend(mock_cube; thresh=0.5)
@test mock_trend.axes == (mock_cube.X, mock_cube.Y)
diff = abs(mean(mock_trend))
@test diff < 0.5
@test diff < 254
end

"""rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr")

Compute the RQA trend metric for the data that is available on `path`.
See the `rqatrend` for a YAXArray for the description of the parameters.
"""
rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) =
rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...)
rqatrend(path::AbstractString; thresh=2, lowerbound=-5., upperbound=-0.5, outpath=tempname() * ".zarr", overwrite=false, kwargs...) =
rqatrend(Cube(path); thresh, lowerbound, upperbound, outpath, overwrite, kwargs...)


"""
rqatrend(xout, xin, thresh)

Compute the RQA trend metric for the non-missing time steps of xin, and save it to xout.
`thresh` specifies the epsilon threshold of the Recurrence Plot computation
`thresh` specifies the epsilon threshold of the Recurrence Plot computation.
`lowerbound` and `upperbound` are the bounds of the classification into UInt8.
The result of rqatrend are UInt8 values between 0 (no change) to 254 (definitive change) with 255 as sentinel value for missing data.
"""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should add to the docs that a value between 0 and 254 is returned and 255 corresponds to missing. Also, the kwargs lowerbound and upperbound are not documented

function rqatrend(pix_trend, pix, thresh=2)
pix_trend .= rqatrend_impl(pix; thresh)
function rqatrend(pix_trend, pix, thresh=2, lowerbound=-5., upperbound=-0.5)
pix_trend .= classify_rqatrend(rqatrend_impl(pix; thresh); lowerbound, upperbound)
end

"""
classify_rqatrend(trend; lowerbound=Float32(-5.0), upperbound=Float32(-0.5)))
Classify the rqatrend and put it into 254 bins so that they can fit into a UInt8 encoding.
This is a compromise between data storage and accuracy of the change detection.
The value range is 0 (no change) to 254 (definitive change) with 255 kept free as a Sentinel value for missing data.
"""
function classify_rqatrend(trend; lowerbound=Float32(-5.0), upperbound=Float32(-0.5))
isnan(trend) && return UInt8(255)
ctrend = clamp(trend, lowerbound, upperbound)
rlength = upperbound - lowerbound
return round(UInt8, 254-((ctrend - lowerbound) / rlength) * 254)
end

@testitem "classify_rqatrend" begin
import AllocCheck
@test RQADeforestation.classify_rqatrend(-4.999) === UInt8(254)
@test RQADeforestation.classify_rqatrend(1) === UInt8(0)
@test RQADeforestation.classify_rqatrend(-0.52) === UInt8(1)
@test RQADeforestation.classify_rqatrend(-6) === UInt8(254)
@test isempty( AllocCheck.check_allocs(RQADeforestation.classify_rqatrend, (Float32,)))
end

function rqatrend_impl(data; thresh=2, border=10, theiler=1, metric=CheckedEuclidean())
# simplified implementation of https://stats.stackexchange.com/a/370175 and https://github.com/joshday/OnlineStats.jl/blob/b89a99679b13e3047ff9c93a03c303c357931832/src/stats/linreg.jl
Expand Down
13 changes: 13 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,16 @@ doctest(RQADeforestation)

using TestItemRunner
@run_package_tests


@testitem "rqa step function" begin
using RQADeforestation
using Distributions: Normal as DNormal
x = range(0,100,length=1000)
ts2 = zero(x)
ts2[1:div(end,2)] .= rand.(DNormal(3,1))
ts2[div(end,2):end] .= rand.(DNormal(0,1))
pixtrend = UInt8[255]
RQADeforestation.rqatrend(pixtrend, ts2)
@test pixtrend[] < 20
end
12 changes: 7 additions & 5 deletions test/testdata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
@testitem "testdata main" begin
import Pkg: Artifacts.@artifact_str
using LazyArtifacts
using FilePathsBase
testdatapath = artifact"rqatestdata/RQADeforestationTestData-2.0"

testdir = tempname()
rm(testdir, recursive=true, force=true)
mkpath(testdir)
outdir = "$testdir/out.zarr"
outdir = "$testdir/out"
indir = "$testdir/in"
cp(testdatapath, indir)

Expand All @@ -20,13 +21,14 @@
indir=indir,
start_date=Date("2021-01-01"),
end_date=Date("2022-01-01"),
outdir=outdir
outdir=Path(outdir),
stack=:lazyagg,
)
a = open_dataset(outdir * "/E051N018T3_rqatrend_VH_D022_thresh_3.0.zarr").layer
a = open_dataset(joinpath(outdir, "E051N018T3_rqatrend_VH_D022_thresh_3.0_2021-01-01_2022-01-01.zarr")).layer

@test size(a) == (50, 74)
@test minimum(a) < 0
@test maximum(a) > 0
@test minimum(a) == 0
@test maximum(a) > 200
end

@testitem "testdata julia_main" begin
Expand Down