From b5cd34cdc7263181b8fa4dd36fd833133db4c58b Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Mon, 17 Nov 2025 15:59:25 +0100 Subject: [PATCH 01/14] Replace everything by missing if gdalfile throws error and rechunk the data This will allow to read Sentinel-1 data even though one single time step is not available. --- src/auxil.jl | 21 +++++++++++++++++++-- src/main.jl | 32 ++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/auxil.jl b/src/auxil.jl index 9a6ac1c..d164912 100644 --- a/src/auxil.jl +++ b/src/auxil.jl @@ -34,7 +34,16 @@ function DiskArrays.readblock!(a::LazyAggDiskArray, aout, i::UnitRange{Int}...) for (j, it) in enumerate(itime) arrays_now = a.arrays[a.inds[it]] for ia in eachindex(arrays_now) - DiskArrays.readblock!(arrays_now[ia], view(buf, :, :, ia), i1, i2) + try + DiskArrays.readblock!(arrays_now[ia], view(buf, :, :, ia), i1, i2) + catch e + if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError + @warn e.captured.ex.msg + buf[:,:,ia] .= missing + else + rethrow(e) + end + end end vbuf = view(buf, :, :, 1:length(arrays_now)) map!(a.f, view(aout, :, :, j), eachslice(vbuf, dims=(1, 2))) @@ -189,7 +198,7 @@ function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:dae) aggdata = DAE.aggregate_diskarray(gcube, mean ∘ skipmissing, (3 => stackinds,); strategy=:direct) else println("Construct lazy diskarray") - LazyAggDiskArray(mean ∘ skipmissing, cubelist, stackinds) + LazyAggDiskArray(skipmissingmean, cubelist, stackinds) end # data = DiskArrays.ConcatDiskArray(reshape(groupcubes, (1,1,length(groupcubes)))) dates_grouped = [sdates[group[begin]] for group in groupinds] @@ -240,6 +249,14 @@ function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:dae) #dates_grouped = [sdates[group[begin]] for group in groupinds] end +function skipmissingmean(x) + isempty(x) && return missing + s,n = reduce(x,init=(zero(eltype(x)),0)) do (s,n), ix + ismissing(ix) ? (s,n) : (s+ix,n+1) + end + n==0 ? missing : s/n +end +#Base.∘(::typeof(mean), ::typeof(skipmissing)) = skipmissingmean const equi7crs = Dict( "AF" => ProjString("+proj=aeqd +lat_0=8.5 +lon_0=21.5 +x_0=5621452.01998 +y_0=5990638.42298 +datum=WGS84 +units=m +no_defs"), diff --git a/src/main.jl b/src/main.jl index c09626c..a88c83d 100644 --- a/src/main.jl +++ b/src/main.jl @@ -75,7 +75,7 @@ end function main(; - tiles::Vector{String}, + tiles, continent::String, indir::String, outstore=Zarr.S3Store("europe-forest-change"), @@ -125,18 +125,18 @@ function main(; #@show glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders] allfilenames = collect(Iterators.flatten(filenamelist)) - #@show allfilenames + @show length(allfilenames) relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames]) @show relorbits for relorbit in relorbits - path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)")) + path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)_$(start_date)_$(end_date)")) #s3path = "s3://"*joinpath(outstore.bucket, path) @show path exists(path * ".done") && continue exists(path * "_zerotimesteps.done") && continue filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)] - @time cube = gdalcube(filenames, stack) + @time "cube construction" cube = gdalcube(filenames, stack) @@ -156,15 +156,31 @@ function main(; rm(S3Path(orbitoutpath), recursive=true) end @show orbitoutpath - # This seems to ignore the overwrite keyword when the outpath point to S3. - @time rqatrend(tcube; thresh=threshold, outpath=orbitoutpath, overwrite=true) + # We save locally and then save a rechunked version in the cloud, + # because the chunking is suboptimal which we get from the automatic setting. + tmppath = tempname() * ".zarr" + @time "rqatrend" rqatrend(tcube; thresh=threshold, outpath=tmppath, overwrite=true) + c = Cube(tmppath) + @time "save to S3" savecube(setchunks(c, (15000,15000)), orbitoutpath) + rm(tmppath, recursive=true) + @show delete_intermediate if delete_intermediate == false - PyramidScheme.buildpyramids(orbitoutpath) + #PyramidScheme.buildpyramids(orbitoutpath) Zarr.consolidate_metadata(orbitoutpath) end catch e - + println("inside catch") if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError + msg = e.captured.ex.msg + corruptfile = split(msg, " ")[1][1:end-1] + corrupt_parts = split(corruptfile, "_") + foldername = corrupt_parts[end-1] + continentfolder = corrupt_parts[end-2] + corruptpath = joinpath(indir, foldername, "EQUI7_$continentfolder", tilefolder, corruptfile) + println("Corrupted input file") + println(corruptpath) + println(joinpath(indir, )) + println(e.captured.ex.msg) println(corruptedfiles, "Found GDALError:") println(corruptedfiles, e.captured.ex.msg) continue From d0ed431ebe8172ecd0d886f872ab14e47a75bf31 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Mon, 17 Nov 2025 15:59:36 +0100 Subject: [PATCH 02/14] Remove timestats.jl include --- src/RQADeforestation.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/RQADeforestation.jl b/src/RQADeforestation.jl index 0e81a71..8e024c1 100644 --- a/src/RQADeforestation.jl +++ b/src/RQADeforestation.jl @@ -18,7 +18,6 @@ include("auxil.jl") include("rqatrend.jl") include("analysis.jl") # TODO what is still needed from analysis now that rqatrend is in its own file? include("cluster.jl") -include("timestats.jl") include("main.jl") end \ No newline at end of file From a07b9be74c60d96c97d411708c6270d5a7dc87b6 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Mon, 17 Nov 2025 16:00:08 +0100 Subject: [PATCH 03/14] Add classifier for RQA trend With this we can save the RQA trend data in UInt8 data and can apply the threshold on that. --- src/rqatrend.jl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/rqatrend.jl b/src/rqatrend.jl index 2ef3f2d..fbac7f5 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -8,7 +8,7 @@ using Distances Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`. """ function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) - mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=UInt8, path=outpath, overwrite, kwargs...)) + mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=UInt8, path=outpath, fill_value=255, overwrite, kwargs...)) end @testitem "rqatrend cube" begin @@ -52,10 +52,16 @@ function rqatrend(pix_trend, pix, thresh=2, lowerbound=-5., upperbound=-0.5) pix_trend .= classify_rqatrend(rqatrend_impl(pix; thresh); lowerbound, upperbound) end +""" + classify_rqatrend(trend; lowerbound=Float32(-5.0), upperbound=Float32(-0.5))) +Classify the rqatrend and put it into 254 bins so that they can fit into a UInt8 encoding. +This is a compromise between data storage and accuracy of the change detection. +""" function classify_rqatrend(trend; lowerbound=Float32(-5.0), upperbound=Float32(-0.5)) + isnan(trend) && return UInt8(255) ctrend = clamp(trend, lowerbound, upperbound) rlength = upperbound - lowerbound - return round(UInt8, 255-((ctrend - lowerbound) / rlength) * 255) + return round(UInt8, 254-((ctrend - lowerbound) / rlength) * 254) end @testitem "classify_rqatrend" begin From c74864e2331629e091f668c85d9eb9028214ba2e Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Mon, 17 Nov 2025 16:18:05 +0100 Subject: [PATCH 04/14] Tidy up merge --- src/main.jl | 21 --------------------- src/rqatrend.jl | 8 ++++++++ 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/src/main.jl b/src/main.jl index 4c01cc7..e9783ef 100644 --- a/src/main.jl +++ b/src/main.jl @@ -100,16 +100,10 @@ function main(; for tilefolder in tiles filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders] allfilenames = collect(Iterators.flatten(filenamelist)) -<<<<<<< HEAD - @show length(allfilenames) -======= - ->>>>>>> main relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames]) @show relorbits for relorbit in relorbits -<<<<<<< HEAD path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)_$(start_date)_$(end_date)")) #s3path = "s3://"*joinpath(outstore.bucket, path) @show path @@ -118,10 +112,6 @@ function main(; filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)] @time "cube construction" cube = gdalcube(filenames, stack) -======= - filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)] - @time cube = gdalcube(filenames, stack) ->>>>>>> main path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)") @show path @@ -136,7 +126,6 @@ function main(; continue end try -<<<<<<< HEAD orbitoutpath = string(path * ".zarr/") # This is only necessary because overwrite=true doesn't work on S3 based Zarr files in YAXArrays # See https://github.com/JuliaDataCubes/YAXArrays.jl/issues/511 @@ -157,15 +146,9 @@ function main(; #PyramidScheme.buildpyramids(orbitoutpath) Zarr.consolidate_metadata(orbitoutpath) end -======= - outpath = path * ".zarr" - @time rqatrend(tcube; thresh=threshold, outpath=outpath, overwrite=true) - Zarr.consolidate_metadata(outpath) ->>>>>>> main catch e println("inside catch") if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError -<<<<<<< HEAD msg = e.captured.ex.msg corruptfile = split(msg, " ")[1][1:end-1] corrupt_parts = split(corruptfile, "_") @@ -178,10 +161,6 @@ function main(; println(e.captured.ex.msg) println(corruptedfiles, "Found GDALError:") println(corruptedfiles, e.captured.ex.msg) -======= - println("Found GDALError:") - println(e.captured.ex.msg) ->>>>>>> main continue else rethrow(e) diff --git a/src/rqatrend.jl b/src/rqatrend.jl index 0aa0603..cf4db3b 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -64,6 +64,14 @@ function classify_rqatrend(trend; lowerbound=Float32(-5.0), upperbound=Float32(- return round(UInt8, 254-((ctrend - lowerbound) / rlength) * 254) end +@testitem "classify_rqatrend" begin + import AllocCheck + @test RQADeforestation.classify_rqatrend(-4.999) === UInt8(255) + @test RQADeforestation.classify_rqatrend(1) === UInt8(0) + @test RQADeforestation.classify_rqatrend(-0.52) === UInt8(1) + @test RQADeforestation.classify_rqatrend(-6) === UInt8(255) + @test isempty( AllocCheck.check_allocs(RQADeforestation.classify_rqatrend, (Float32,))) +end function rqatrend_impl(data; thresh=2, border=10, theiler=1, metric=CheckedEuclidean()) # simplified implementation of https://stats.stackexchange.com/a/370175 and https://github.com/joshday/OnlineStats.jl/blob/b89a99679b13e3047ff9c93a03c303c357931832/src/stats/linreg.jl From 409999176492f64ff84695aafaf747a47b8de24a Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 09:55:49 +0100 Subject: [PATCH 05/14] Add AWSS3 dependency --- Project.toml | 2 ++ src/main.jl | 1 + 2 files changed, 3 insertions(+) diff --git a/Project.toml b/Project.toml index 4f683b0..a62d9c2 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Felix Cremer , Daniel Loos Date: Tue, 18 Nov 2025 15:08:36 +0100 Subject: [PATCH 06/14] Use Path consistently in main --- src/main.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/main.jl b/src/main.jl index 15f584d..3502d9c 100644 --- a/src/main.jl +++ b/src/main.jl @@ -1,6 +1,6 @@ using ArgParse using YAXArrays: YAXDefaults -using AWSS3: S3Path +using FilePathsBase: exists, Path const argparsesettings = ArgParseSettings() @@ -77,8 +77,10 @@ function main(; orbit="D", threshold=3.0, folders=["V1M0R1", "V1M1R1", "V1M1R2"], - stack=:dae + stack=:dae, + delete_intermediate=false ) + outdir = Path(outdir) in(orbit, ["A", "D"]) || error("Orbit needs to be either A or D") if isdir(indir) && isempty(indir) error("Input directory $indir must not be empty") @@ -94,7 +96,6 @@ function main(; @warn "Selected time series does not include a multiple of whole years. This might introduce seasonal bias." end - YAXDefaults.workdir[] = outdir corruptedfiles = "corrupted_tiles.txt" # TODO save the corrupt files to a txt for investigation @@ -105,7 +106,7 @@ function main(; @show relorbits for relorbit in relorbits - path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)_$(start_date)_$(end_date)")) + path = joinpath(outdir, "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)_$(start_date)_$(end_date)") #s3path = "s3://"*joinpath(outstore.bucket, path) @show path exists(path * ".done") && continue @@ -114,7 +115,7 @@ function main(; @time "cube construction" cube = gdalcube(filenames, stack) - path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)") + path = joinpath(outdir, "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)") @show path ispath(path * ".done") && continue ispath(path * "_zerotimesteps.done") && continue @@ -127,12 +128,12 @@ function main(; continue end try - orbitoutpath = string(path * ".zarr/") + orbitoutpath = path * ".zarr/" # This is only necessary because overwrite=true doesn't work on S3 based Zarr files in YAXArrays # See https://github.com/JuliaDataCubes/YAXArrays.jl/issues/511 - if exists(S3Path(orbitoutpath)) + if exists(orbitoutpath) println("Deleting path $orbitoutpath") - rm(S3Path(orbitoutpath), recursive=true) + rm(orbitoutpath, recursive=true) end @show orbitoutpath # We save locally and then save a rechunked version in the cloud, @@ -140,12 +141,12 @@ function main(; tmppath = tempname() * ".zarr" @time "rqatrend" rqatrend(tcube; thresh=threshold, outpath=tmppath, overwrite=true) c = Cube(tmppath) - @time "save to S3" savecube(setchunks(c, (15000,15000)), orbitoutpath) + @time "save to S3" savecube(setchunks(c, (15000,15000)), string(orbitoutpath)) rm(tmppath, recursive=true) @show delete_intermediate if delete_intermediate == false #PyramidScheme.buildpyramids(orbitoutpath) - Zarr.consolidate_metadata(orbitoutpath) + Zarr.consolidate_metadata(string(orbitoutpath)) end catch e println("inside catch") From f3b3839aa7728356cf74f8dcd1abf8708d45f1f5 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 15:09:38 +0100 Subject: [PATCH 07/14] Fix tests --- src/rqatrend.jl | 6 +++--- test/runtests.jl | 13 +++++++++++++ test/testdata.jl | 7 ++++--- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/rqatrend.jl b/src/rqatrend.jl index cf4db3b..9835ad9 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -31,7 +31,7 @@ end mock_trend = rqatrend(mock_cube; thresh=0.5) @test mock_trend.axes == (mock_cube.X, mock_cube.Y) diff = abs(mean(mock_trend)) - @test diff < 0.5 + @test diff < 254 end """rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr") @@ -66,10 +66,10 @@ end @testitem "classify_rqatrend" begin import AllocCheck - @test RQADeforestation.classify_rqatrend(-4.999) === UInt8(255) + @test RQADeforestation.classify_rqatrend(-4.999) === UInt8(254) @test RQADeforestation.classify_rqatrend(1) === UInt8(0) @test RQADeforestation.classify_rqatrend(-0.52) === UInt8(1) - @test RQADeforestation.classify_rqatrend(-6) === UInt8(255) + @test RQADeforestation.classify_rqatrend(-6) === UInt8(254) @test isempty( AllocCheck.check_allocs(RQADeforestation.classify_rqatrend, (Float32,))) end diff --git a/test/runtests.jl b/test/runtests.jl index 968e1c9..2a9782d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,3 +10,16 @@ doctest(RQADeforestation) using TestItemRunner @run_package_tests + + +@testitem "rqa step function" begin + using RQADeforestation + using Distributions: Normal as DNormal + x = range(0,100,length=1000) + ts2 = zero(x) + ts2[1:div(end,2)] .= rand.(DNormal(3,1)) + ts2[div(end,2):end] .= rand.(DNormal(0,1)) + pixtrend = UInt8[255] + RQADeforestation.rqatrend(pixtrend, ts2) + @test pixtrend[] < 20 +end \ No newline at end of file diff --git a/test/testdata.jl b/test/testdata.jl index 078f78a..711e353 100644 --- a/test/testdata.jl +++ b/test/testdata.jl @@ -2,6 +2,7 @@ @testitem "testdata main" begin import Pkg: Artifacts.@artifact_str using LazyArtifacts + using FilePathsBase testdatapath = artifact"rqatestdata/RQADeforestationTestData-2.0" testdir = tempname() @@ -20,13 +21,13 @@ indir=indir, start_date=Date("2021-01-01"), end_date=Date("2022-01-01"), - outdir=outdir + outdir=Path(outdir) ) a = open_dataset(outdir * "/E051N018T3_rqatrend_VH_D022_thresh_3.0.zarr").layer @test size(a) == (50, 74) - @test minimum(a) < 0 - @test maximum(a) > 0 + @test minimum(a) == 0 + @test maximum(a) > 200 end @testitem "testdata julia_main" begin From f781d002cf64990df145b2c46190a56d0c700991 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 15:12:07 +0100 Subject: [PATCH 08/14] Remove AWSS3 This is actually not needed, because we are trying to keep the Path type and don't have to recast it. --- Project.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index a62d9c2..9df7b12 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,9 @@ name = "RQADeforestation" uuid = "3f1d9318-18cc-4ffd-9937-04025ce33b74" -authors = ["Felix Cremer , Daniel Loos and contributors"] version = "1.0.0-DEV" +authors = ["Felix Cremer , Daniel Loos and contributors"] [deps] -AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" @@ -45,7 +44,6 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" [compat] -AWSS3 = "0.10.4" AllocCheck = "0.2.2" Aqua = "0.8.13" ArchGDAL = "0.10" From 49e206c475578c636144f364ea20ff17a39a837c Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 15:16:55 +0100 Subject: [PATCH 09/14] Add test for skipmissingmean --- src/auxil.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/auxil.jl b/src/auxil.jl index d164912..1d392a3 100644 --- a/src/auxil.jl +++ b/src/auxil.jl @@ -256,8 +256,18 @@ function skipmissingmean(x) end n==0 ? missing : s/n end + +# I don't dare to make this type piracy. #Base.∘(::typeof(mean), ::typeof(skipmissing)) = skipmissingmean +@testitem "skipmissingmean" begin + @test ismissing(RQADeforestation.skipmissingmean(Float32[])) + @test ismissing(RQADeforestation.skipmissingmean(Union{Float32, Missing}[missing, missing])) + @test RQADeforestation.skipmissingmean([1,0,missing]) == 0.5 + @test RQADeforestation.skipmissingmean([1,2,3]) == 2.0 +end + + const equi7crs = Dict( "AF" => ProjString("+proj=aeqd +lat_0=8.5 +lon_0=21.5 +x_0=5621452.01998 +y_0=5990638.42298 +datum=WGS84 +units=m +no_defs"), "AN" => ProjString("+proj=aeqd +lat_0=-90 +lon_0=0 +x_0=3714266.97719 +y_0=3402016.50625 +datum=WGS84 +units=m +no_defs"), From ede4a0bd1837833f8f02dd97894fa763dfdc983b Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 16:36:22 +0100 Subject: [PATCH 10/14] Remove duplicate path definition in main --- src/main.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main.jl b/src/main.jl index 3502d9c..32bc406 100644 --- a/src/main.jl +++ b/src/main.jl @@ -114,9 +114,6 @@ function main(; filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)] @time "cube construction" cube = gdalcube(filenames, stack) - - path = joinpath(outdir, "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)") - @show path ispath(path * ".done") && continue ispath(path * "_zerotimesteps.done") && continue From d78cfbff7ca0501333847d811b3ad93da6a10354 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 16:36:36 +0100 Subject: [PATCH 11/14] Improve docstrings in rqatrend --- src/rqatrend.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/rqatrend.jl b/src/rqatrend.jl index 9835ad9..b2dcb3b 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -6,9 +6,10 @@ using Distances """rqatrend(cube;thresh=2, path=tempname() * ".zarr") Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`. +`lowerbound` and `upperbound` are forwarded to the classification of the RQA Trend result. """ -function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) - mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=UInt8, path=outpath, fill_value=255, overwrite, kwargs...)) +function rqatrend(cube; thresh=2, lowerbound=-5, upperbound=-0.5, outpath=tempname() * ".zarr", overwrite=false, kwargs...) + mapCube(rqatrend, cube, thresh, lowerbound, upperbound; indims=InDims("Time"), outdims=OutDims(; outtype=UInt8, path=outpath, fill_value=255, overwrite, kwargs...)) end @testitem "rqatrend cube" begin @@ -37,16 +38,19 @@ end """rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr") Compute the RQA trend metric for the data that is available on `path`. +See the `rqatrend` for a YAXArray for the description of the parameters. """ -rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = - rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...) +rqatrend(path::AbstractString; thresh=2, lowerbound=-5., upperbound=-0.5, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = + rqatrend(Cube(path); thresh, lowerbound, upperbound, outpath, overwrite, kwargs...) """ rqatrend(xout, xin, thresh) Compute the RQA trend metric for the non-missing time steps of xin, and save it to xout. -`thresh` specifies the epsilon threshold of the Recurrence Plot computation +`thresh` specifies the epsilon threshold of the Recurrence Plot computation. +`lowerbound` and `upperbound` are the bounds of the classification into UInt8. +The result of rqatrend are UInt8 values between 0 (no change) to 254 (definitive change) with 255 as sentinel value for missing data. """ function rqatrend(pix_trend, pix, thresh=2, lowerbound=-5., upperbound=-0.5) pix_trend .= classify_rqatrend(rqatrend_impl(pix; thresh); lowerbound, upperbound) @@ -56,6 +60,7 @@ end classify_rqatrend(trend; lowerbound=Float32(-5.0), upperbound=Float32(-0.5))) Classify the rqatrend and put it into 254 bins so that they can fit into a UInt8 encoding. This is a compromise between data storage and accuracy of the change detection. +The value range is 0 (no change) to 254 (definitive change) with 255 kept free as a Sentinel value for missing data. """ function classify_rqatrend(trend; lowerbound=Float32(-5.0), upperbound=Float32(-0.5)) isnan(trend) && return UInt8(255) From a4bb4dea16959c064754681e9221797503db022d Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 16:41:19 +0100 Subject: [PATCH 12/14] Fix main test with actual data --- test/testdata.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testdata.jl b/test/testdata.jl index 711e353..2254eff 100644 --- a/test/testdata.jl +++ b/test/testdata.jl @@ -8,7 +8,7 @@ testdir = tempname() rm(testdir, recursive=true, force=true) mkpath(testdir) - outdir = "$testdir/out.zarr" + outdir = "$testdir/out" indir = "$testdir/in" cp(testdatapath, indir) @@ -23,7 +23,7 @@ end_date=Date("2022-01-01"), outdir=Path(outdir) ) - a = open_dataset(outdir * "/E051N018T3_rqatrend_VH_D022_thresh_3.0.zarr").layer + a = open_dataset(joinpath(outdir, "E051N018T3_rqatrend_VH_D022_thresh_3.0_2021-01-01_2022-01-01.zarr")).layer @test size(a) == (50, 74) @test minimum(a) == 0 From 03fc8930db4e3cf56a04c0568a8a223c89e6f1c8 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 17:09:24 +0100 Subject: [PATCH 13/14] Remove old unused loading path --- src/auxil.jl | 63 +--------------------------------------------------- 1 file changed, 1 insertion(+), 62 deletions(-) diff --git a/src/auxil.jl b/src/auxil.jl index 1d392a3..813109c 100644 --- a/src/auxil.jl +++ b/src/auxil.jl @@ -144,27 +144,7 @@ function stackindices(times, timediff=200000) return groups end -#= -function DiskArrays.readblock!(b::GDALBand, aout, r::AbstractUnitRange...) - if !isa(aout,Matrix) - aout2 = similar(aout) - AG.read(b.filename) do ds - AG.getband(ds, b.band) do bh - DiskArrays.readblock!(bh, aout2, r...) - end - end - aout .= aout2 - else - AG.read(b.filename) do ds - AG.getband(ds, b.band) do bh - DiskArrays.readblock!(bh, aout, r...) - end - end - end -end -=# - -function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:dae) +function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:lazyagg) dates = getdate.(filenames) @show length(dates) # Sort the dates and files by DateTime @@ -206,47 +186,6 @@ function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:dae) taxis = DD.Ti(dates_grouped) gcube = Cube(sfiles[1]) return YAXArray((DD.dims(gcube)[1:2]..., taxis), aggdata, gcube.properties,) - else - #datasets = AG.readraster.(sfiles) - taxis = DD.Ti(sdates) - - onefile = first(sfiles) - gd = backendlist[:gdal] - yax1 = gd(onefile) - onecube = Cube(onefile) - #@show onecube.axes - gdb = get_var_handle(yax1, "Gray") - - #@assert gdb isa GDALBand - all_gdbs = map(sfiles) do f - BufferGDALBand{eltype(gdb)}(f, gdb.band, gdb.size, gdb.attrs, gdb.cs, Dict{Int,AG.IRasterBand}()) - end - stacked_gdbs = diskstack(all_gdbs) - attrs = copy(gdb.attrs) - #attrs["add_offset"] = Float16(attrs["add_offset"]) - if haskey(attrs, "scale_factor") - attrs["scale_factor"] = Float16(attrs["scale_factor"]) - end - all_cfs = CFDiskArray(stacked_gdbs, attrs) - return YAXArray((onecube.axes..., taxis), all_cfs, onecube.properties) - end - #datasetgroups = [datasets[group] for group in groupinds] - #We have to save the vrts because the usage of nested vrts is not working as a rasterdataset - #temp = tempdir() - #outpaths = [joinpath(temp, splitext(basename(sfiles[group][1]))[1] * ".vrt") for group in groupinds] - #vrt_grouped = AG.unsafe_gdalbuildvrt.(datasetgroups) - #AG.write.(vrt_grouped, outpaths) - #vrt_grouped = AG.read.(outpaths) - #vrt_vv = AG.unsafe_gdalbuildvrt(vrt_grouped, ["-separate"]) - #rvrt_vv = AG.RasterDataset(vrt_vv) - #yaxras = YAXArray.(sfiles) - #cube = concatenatecubes(yaxras, taxis) - #bandnames = AG.GDAL.gdalgetfilelist(vrt_vv.ptr) - - - - # Set the timesteps from the bandnames as time axis - #dates_grouped = [sdates[group[begin]] for group in groupinds] end function skipmissingmean(x) From 0a6f42893d486532edd2390290fa0d0a7edc4a04 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 18 Nov 2025 17:22:35 +0100 Subject: [PATCH 14/14] Remove aggregate_diskarray way as well --- src/auxil.jl | 10 ++-------- src/main.jl | 2 +- test/testdata.jl | 3 ++- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/auxil.jl b/src/auxil.jl index 813109c..0d968ee 100644 --- a/src/auxil.jl +++ b/src/auxil.jl @@ -154,7 +154,6 @@ function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:lazy #@show sdates # Put the dates which are 200 seconds apart into groups - if stackgroups in [:dae, :lazyagg] groupinds = grouptimes(sdates, 200000) onefile = first(sfiles) gd = backendlist[:gdal] @@ -173,13 +172,8 @@ function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:lazy cubelist = CFDiskArray.(group_gdbs, (gdbattrs,)) stackinds = stackindices(sdates) - aggdata = if stackgroups == :dae - gcube = diskstack(cubelist) - aggdata = DAE.aggregate_diskarray(gcube, mean ∘ skipmissing, (3 => stackinds,); strategy=:direct) - else - println("Construct lazy diskarray") - LazyAggDiskArray(skipmissingmean, cubelist, stackinds) - end + println("Construct lazy diskarray") + aggdata = LazyAggDiskArray(skipmissingmean, cubelist, stackinds) # data = DiskArrays.ConcatDiskArray(reshape(groupcubes, (1,1,length(groupcubes)))) dates_grouped = [sdates[group[begin]] for group in groupinds] diff --git a/src/main.jl b/src/main.jl index 32bc406..7a9cb03 100644 --- a/src/main.jl +++ b/src/main.jl @@ -77,7 +77,7 @@ function main(; orbit="D", threshold=3.0, folders=["V1M0R1", "V1M1R1", "V1M1R2"], - stack=:dae, + stack=:lazyagg, delete_intermediate=false ) outdir = Path(outdir) diff --git a/test/testdata.jl b/test/testdata.jl index 2254eff..0e8f3c2 100644 --- a/test/testdata.jl +++ b/test/testdata.jl @@ -21,7 +21,8 @@ indir=indir, start_date=Date("2021-01-01"), end_date=Date("2022-01-01"), - outdir=Path(outdir) + outdir=Path(outdir), + stack=:lazyagg, ) a = open_dataset(joinpath(outdir, "E051N018T3_rqatrend_VH_D022_thresh_3.0_2021-01-01_2022-01-01.zarr")).layer