Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ authors = ["Felix Cremer <[email protected]>, Daniel Loos <dloos@bgc-
version = "1.0.0-DEV"

[deps]
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
Expand All @@ -25,14 +24,13 @@ GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
ImageMorphology = "787d08f9-d448-5407-9aad-5290dd7ab264"
KML = "1284bf3a-1e3d-4f4e-a7a9-b9d235a28f35"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Proj = "c94c279d-25a6-4763-9509-64d165bea63e"
PyramidScheme = "ec211b67-1c2c-4319-878f-eaee078ee145"
Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689"
RecurrenceAnalysis = "639c3291-70d9-5ea2-8c5b-839eba1ee399"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Expand All @@ -46,7 +44,6 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"

[compat]
AWSS3 = "0.10.4"
AllocCheck = "0.2.2"
Aqua = "0.8.13"
ArchGDAL = "0.10"
Expand All @@ -71,10 +68,10 @@ GeoFormatTypes = "0.4"
Glob = "1"
ImageMorphology = "0.4.5"
KML = "0.2"
LazyArtifacts = "1.11.0"
Libdl = "1.10"
LinearAlgebra = "1.10"
LoggingExtras = "1"
Minio = "0.2.2"
Missings = "1"
NetCDF = "0.12"
Pkg = "1.10"
Expand All @@ -93,7 +90,7 @@ TestItems = "1.0"
TimeseriesSurrogates = "2"
UnicodePlots = "3"
YAXArrayBase = "0.6, 0.7"
YAXArrays = "0.5, 0.6"
YAXArrays = "0.5, 0.6, 0.7"
Zarr = "0.9"
julia = "1.11"

Expand All @@ -102,13 +99,13 @@ AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[targets]
test = ["Test", "TestItemRunner", "Pkg", "Random", "AllocCheck", "BenchmarkTools", "Aqua", "Documenter", "StaticTools", "PythonCall", "Libdl"]
2 changes: 0 additions & 2 deletions src/RQADeforestation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ using Zarr
using Distributed: myid
using NetCDF
using TestItems
using PyramidScheme: PyramidScheme
using DimensionalData: DimensionalData as DD

export gdalcube, rqatrend
Expand All @@ -18,7 +17,6 @@ include("auxil.jl")
include("rqatrend.jl")
include("analysis.jl") # TODO what is still needed from analysis now that rqatrend is in its own file?
include("cluster.jl")
include("timestats.jl")
include("main.jl")

end
87 changes: 54 additions & 33 deletions src/cluster.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,78 +7,99 @@ using FillArrays
#maskfolder = "data/forestcompressed"

function meanvote(orbits, significance_thresh=-1.28)
s,n = 0.0,0
s, n = 0.0, 0
for i in eachindex(orbits)
if orbits[i] != 0 && !isnan(orbits[i])
s += orbits[i]
n += 1
end
end
m = s/n
m < significance_thresh ? 1 : 0
m = s / n
return m < significance_thresh ? 1 : 0
end

function filtersmallcomps!(xout,xin_unfiltered,forestmask,comborbits,connsize;dims=:,threaded=false)
xin = broadcast(xin_unfiltered,forestmask) do x,m
ismissing(m) ? zero(x) : x*m
function filtersmallcomps!(
xout, xin_unfiltered, forestmask, comborbits, connsize; dims=:, threaded=false
)
xin = broadcast(xin_unfiltered, forestmask) do x, m
ismissing(m) ? zero(x) : x * m
end
x = similar(Array{Float64},(axes(xin,1),axes(xin,2),Base.OneTo(1)))
for j in axes(x,2), i in axes(x,1)
x[i,j,1] = comborbits(view(xin,i,j,:))
x = similar(Array{Float64}, (axes(xin, 1), axes(xin, 2), Base.OneTo(1)))
for j in axes(x, 2), i in axes(x, 1)
x[i, j, 1] = comborbits(view(xin, i, j, :))
end
lc = label_components(x)
c = counter(lc)
for ix in eachindex(xout)
v = lc[ix]
if v==0 || c[v] < connsize
if v == 0 || c[v] < connsize
xout[ix] = 0
else
xout[ix] = 1
end
end
end

function postprocess(a,target_array::YAXArray,forestmask::YAXArray, orbitcombine=meanvote;minsize=30,max_cache=5e8)
nx,ny,nz = size(a)
windowsx = DAE.MovingWindow(1 - minsize,1,2*minsize + 1,nx,(1,nx))
windowsy = DAE.MovingWindow(1 - minsize,1,2*minsize + 1,ny,(1,ny))
function postprocess(
a::YAXArray,
target_array::YAXArray,
forestmask::YAXArray,
orbitcombine=meanvote;
minsize=30,
max_cache=5e8,
)
nx, ny, nz = size(a)
windowsx = DAE.MovingWindow(1 - minsize, 1, 2 * minsize + 1, nx, (1, nx))
windowsy = DAE.MovingWindow(1 - minsize, 1, 2 * minsize + 1, ny, (1, ny))
windowsz = [1:nz]
inar1 = DAE.InputArray(a.data,windows=(windowsx,windowsy,windowsz));
inar2 = DAE.InputArray(forestmask.data, windows=(windowsx,windowsy));
inars = (inar1,inar2)
outchunks = (target_array.chunks.chunks...,DAE.RegularChunks(1,0,1))
outars = DAE.create_outwindows((nx,ny,1),chunks=outchunks);
uf = DAE.create_userfunction(filtersmallcomps!,UInt8,is_blockfunction=true,is_mutating=true,args=(orbitcombine,minsize))
op = DAE.GMDWop(inars,(outars,),uf)
plan = DAE.optimize_loopranges(op,max_cache)
runner=DAE.LocalRunner(op,plan,(reshape(target_array.data,(nx,ny,1)),))
inar1 = DAE.InputArray(a.data; windows=(windowsx, windowsy, windowsz))
inar2 = DAE.InputArray(forestmask.data; windows=(windowsx, windowsy))
inars = (inar1, inar2)
outchunks = (target_array.chunks.chunks..., DAE.RegularChunks(1, 0, 1))
outars = DAE.create_outwindows((nx, ny, 1); chunks=outchunks)
uf = DAE.create_userfunction(
filtersmallcomps!,
UInt8;
is_blockfunction=true,
is_mutating=true,
args=(orbitcombine, minsize),
)
op = DAE.GMDWop(inars, (outars,), uf)
plan = DAE.optimize_loopranges(op, max_cache)
runner = DAE.LocalRunner(op, plan, (reshape(target_array.data, (nx, ny, 1)),))
run(runner)
target_array
return target_array
end

function postprocess(tile::AbstractString, indir, outpath, maskfolder)
if isdir(outpath)
return
return nothing
end

forpath = only(glob("*$tile*", maskfolder))
@show forpath
allfiles = readdir(indir, join=true)
allfiles = readdir(indir; join=true)
@show allfiles
orbitfiles = filter(x->occursin(tile,string(x)), allfiles)
orbitfiles = filter(x -> occursin(tile, string(x)), allfiles)
@show orbitfiles
orbits = filter(x->occursin(".zarr", string(x)), orbitfiles)
orbits = filter(x -> occursin(".zarr", string(x)), orbitfiles)
@show orbits
orbitname = map(o->split(basename(o),'_')[4],orbits)
orbitname = map(o -> split(basename(o), '_')[4], orbits)
d = DD.format(Dim{:orbit}(orbitname))
files = DD.DimArray(orbits,d)
files = DD.DimArray(orbits, d)
ds = open_mfdataset(string.(files))
nx, ny = size(ds.layer)
outds_skeleton = Dataset(;defo=YAXArray((ds.X,ds.Y),Fill(UInt8(0),nx,ny),chunks=DAE.GridChunks((nx,ny),(256,256))))
dsout = savedataset(outds_skeleton,path=outpath,skeleton=true,overwrite=true)
outds_skeleton = Dataset(;
defo=YAXArray(
(ds.X, ds.Y),
Fill(UInt8(0), nx, ny);
chunks=DAE.GridChunks((nx, ny), (256, 256)),
),
)
dsout = savedataset(outds_skeleton; path=outpath, skeleton=true, overwrite=true)
forest = Cube(forpath)
#masked = map(*, ds.layer, setchunks(forest,DiskArrays.eachchunk(ds.layer.chunks)))
@time postprocess(ds.layer,dsout.defo, forest)
@time postprocess(ds.layer, dsout.defo, forest)
@time PyramidScheme.buildpyramids(outpath)
end

Expand Down
98 changes: 20 additions & 78 deletions src/main.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
using ArgParse
using YAXArrays: YAXDefaults
using ArchGDAL: ArchGDAL
using PyramidScheme
using AWSS3: global_aws_config, S3Path
using FilePathsBase: exists


#using YAXArrays, Zarr
using Minio: MinioConfig

global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1"))

const argparsesettings = ArgParseSettings()

ArgParse.parse_item(::Type{Date}, x::AbstractString) = Date(x)
Expand Down Expand Up @@ -78,121 +69,72 @@ function main(;
tiles::Vector{String},
continent::String,
indir::String,
outstore=Zarr.S3Store("europe-forest-change"),
outdir="results",
tempfolder = S3Path(outstore.bucket, "intermediates/"),
outdir="out.zarr",
start_date::Date,
end_date::Date,
polarisation="VH",
orbit="D",
threshold=3.0,
folders=["V1M0R1", "V1M1R1", "V1M1R2"],
stack=:dae,
postprocess=true,
forestdir="data/forest20m_new",
delete_intermediate=false
stack=:dae
)
#global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1",username="ufew8gJku5hRY7VD6jbEjRi8VnvDfeEv",password="dqZdzWCLB7a9gTshL29AnQWGqL3krwnS"))
in(orbit, ["A", "D"]) || error("Orbit needs to be either A or D")
if isdir(indir) && isempty(indir)
error("Input directory $indir must not be empty")
end

if isdir(tempfolder)
if isdir(outdir)
@warn "Resume from existing output directory"
else
mkdir(tempfolder, recursive=true)
mkdir(outdir)
@info "Write output to $outdir"
end

if monthday(start_date) != monthday(end_date)
@warn "Selected time series does not include a multiple of whole years. This might introduce seasonal bias."
end

YAXDefaults.workdir[] = tempfolder
@show typeof(tempfolder)
YAXDefaults.workdir[] = outdir

corruptedfiles = open("corrupted_tiles.txt", "w")
corruptedfiles = "corrupted_tiles.txt"
# TODO save the corrupt files to a txt for investigation
for tilefolder in tiles
@show tilefolder
outpath = joinpath(outdir, "postprocess_$tilefolder.zarr/")
@show outpath
if outpath in Zarr.subdirs(outstore, outdir)
println("Skip already processed tile $tilefolder")
continue
end
sub = first(folders)
#@show glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir)
filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders]
filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders]
allfilenames = collect(Iterators.flatten(filenamelist))
#@show allfilenames

relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames])
@show relorbits

for relorbit in relorbits
path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)"))
#s3path = "s3://"*joinpath(outstore.bucket, path)
@show path
exists(path * ".done") && continue
exists(path * "_zerotimesteps.done") && continue
filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)]
@time cube = gdalcube(filenames, stack)


path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)")
@show path
ispath(path * ".done") && continue
ispath(path * "_zerotimesteps.done") && continue

tcube = cube[Time=start_date .. end_date]
@show size(cube)
@show size(tcube)
if size(tcube, 3) == 0
touch(S3Path(path * "_zerotimesteps.done"))
touch(path * "_zerotimesteps.done")
continue
end
try
orbitoutpath = string(path * ".zarr/")
# This is only necessary because overwrite=true doesn't work on S3 based Zarr files in YAXArrays
# See https://github.com/JuliaDataCubes/YAXArrays.jl/issues/511
if exists(S3Path(orbitoutpath))
println("Deleting path $orbitoutpath")
rm(S3Path(orbitoutpath), recursive=true)
end
@show orbitoutpath
# This seems to ignore the overwrite keyword when the outpath point to S3.
@time rqatrend(tcube; thresh=threshold, outpath=orbitoutpath, overwrite=true)
if delete_intermediate == false
PyramidScheme.buildpyramids(orbitoutpath)
Zarr.consolidate_metadata(orbitoutpath)
end
outpath = path * ".zarr"
@time rqatrend(tcube; thresh=threshold, outpath=outpath, overwrite=true)
Zarr.consolidate_metadata(outpath)
catch e

if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError
println(corruptedfiles, "Found GDALError:")
println(corruptedfiles, e.captured.ex.msg)
println("Found GDALError:")
println(e.captured.ex.msg)
continue
else
rethrow(e)
end
end
donepath = path * ".done"
@show donepath
touch(S3Path(path * ".done"))
end
if postprocess
@show outpath
DD.STRICT_BROADCAST_CHECKS[] = false

RQADeforestation.postprocess(tilefolder, tempfolder, outpath, forestdir)
Zarr.consolidate_metadata(outpath)
DD.STRICT_BROADCAST_CHECKS[] = true
#base = basename(outpath)
#@show base
#command = `aws --endpoint-url http://s3.fairsendd.eodchosting.eu s3 cp --recursive $outpath s3://europe-forest-change/$base`

#run(command)
touch(path * ".done")
end
if delete_intermediate
rm(tempfolder, force=true, recursive=true)
end


end
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using LazyArtifacts
using RQADeforestation

# doctests do not run as testitem as of now, hence it is included here
Expand Down
3 changes: 2 additions & 1 deletion test/testdata.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

@testitem "testdata main" begin
import Pkg: Artifacts.@artifact_str
using LazyArtifacts
testdatapath = artifact"rqatestdata/RQADeforestationTestData-2.0"

testdir = tempname()
Expand Down Expand Up @@ -55,4 +56,4 @@ end
copy!(ARGS, OLD_ARGS)

@test outdir |> readdir |> length > 1
end
end