Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ authors = ["Felix Cremer <[email protected]>, Daniel Loos <dloos@bgc-
version = "1.0.0-DEV"

[deps]
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
DiskArrayEngine = "2d4b2e14-ccd6-4284-b8b0-2378ace7c126"
Expand All @@ -16,18 +18,22 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Extents = "411431e0-e8b7-467b-b5e0-f676ba4f2910"
FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
GDAL = "add2ef01-049f-52c4-9ee2-e494f65e021a"
GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
ImageMorphology = "787d08f9-d448-5407-9aad-5290dd7ab264"
KML = "1284bf3a-1e3d-4f4e-a7a9-b9d235a28f35"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Proj = "c94c279d-25a6-4763-9509-64d165bea63e"
PyramidScheme = "ec211b67-1c2c-4319-878f-eaee078ee145"
Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689"
RecurrenceAnalysis = "639c3291-70d9-5ea2-8c5b-839eba1ee399"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Expand All @@ -41,12 +47,14 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"

[compat]
AWSS3 = "0.10.4"
AllocCheck = "0.2.2"
Aqua = "0.8.13"
ArchGDAL = "0.10"
ArgParse = "1"
BenchmarkTools = "1.6.0"
ConstructionBase = "1"
DataStructures = "0.18.22"
Dates = "1.10"
DimensionalData = "0.29"
DiskArrayEngine = "0.1, 0.2"
Expand All @@ -57,15 +65,18 @@ Distributed = "1.10"
Distributions = "0.25"
Documenter = "1.12.0"
Extents = "0.1"
FilePathsBase = "0.9.24"
FillArrays = "1"
GDAL = "1"
GeoFormatTypes = "0.4"
Glob = "1"
ImageMorphology = "0.4.5"
KML = "0.2"
LazyArtifacts = "1.11.0"
Libdl = "1.10"
LinearAlgebra = "1.10"
LoggingExtras = "1"
Minio = "0.2.2"
Missings = "1"
NetCDF = "0.12"
Pkg = "1.10"
Expand Down
4 changes: 4 additions & 0 deletions src/RQADeforestation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@ using Zarr
using Distributed: myid
using NetCDF
using TestItems
using PyramidScheme: PyramidScheme
using DimensionalData: DimensionalData as DD

export gdalcube, rqatrend

include("metrics.jl")
include("auxil.jl")
include("rqatrend.jl")
include("analysis.jl") # TODO what is still needed from analysis now that rqatrend is in its own file?
include("cluster.jl")
include("timestats.jl")
include("main.jl")

end
102 changes: 102 additions & 0 deletions src/cluster.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
using ImageMorphology, Zarr, YAXArrays
import DimensionalData as DD
import DiskArrayEngine as DAE
using DataStructures: counter
using Statistics
using FillArrays
#maskfolder = "data/forestcompressed"

function meanvote(orbits, significance_thresh=-1.28)
s,n = 0.0,0
for i in eachindex(orbits)
if orbits[i] != 0 && !isnan(orbits[i])
s += orbits[i]
n += 1
end
end
m = s/n
m < significance_thresh ? 1 : 0
end

function filtersmallcomps!(xout,xin_unfiltered,forestmask,comborbits,connsize;dims=:,threaded=false)
xin = broadcast(xin_unfiltered,forestmask) do x,m
ismissing(m) ? zero(x) : x*m
end
x = similar(Array{Float64},(axes(xin,1),axes(xin,2),Base.OneTo(1)))
for j in axes(x,2), i in axes(x,1)
x[i,j,1] = comborbits(view(xin,i,j,:))
end
lc = label_components(x)
c = counter(lc)
for ix in eachindex(xout)
v = lc[ix]
if v==0 || c[v] < connsize
xout[ix] = 0
else
xout[ix] = 1
end
end
end

function postprocess(a,target_array::YAXArray,forestmask::YAXArray, orbitcombine=meanvote;minsize=30,max_cache=5e8)
nx,ny,nz = size(a)
windowsx = DAE.MovingWindow(1 - minsize,1,2*minsize + 1,nx,(1,nx))
windowsy = DAE.MovingWindow(1 - minsize,1,2*minsize + 1,ny,(1,ny))
windowsz = [1:nz]
inar1 = DAE.InputArray(a.data,windows=(windowsx,windowsy,windowsz));
inar2 = DAE.InputArray(forestmask.data, windows=(windowsx,windowsy));
inars = (inar1,inar2)
outchunks = (target_array.chunks.chunks...,DAE.RegularChunks(1,0,1))
outars = DAE.create_outwindows((nx,ny,1),chunks=outchunks);
uf = DAE.create_userfunction(filtersmallcomps!,UInt8,is_blockfunction=true,is_mutating=true,args=(orbitcombine,minsize))
op = DAE.GMDWop(inars,(outars,),uf)
plan = DAE.optimize_loopranges(op,max_cache)
runner=DAE.LocalRunner(op,plan,(reshape(target_array.data,(nx,ny,1)),))
run(runner)
target_array
end

function postprocess(tile::AbstractString, indir, outpath, maskfolder)
if isdir(outpath)
return
end

forpath = only(glob("*$tile*", maskfolder))
@show forpath
allfiles = readdir(indir, join=true)
@show allfiles
orbitfiles = filter(x->occursin(tile,string(x)), allfiles)
@show orbitfiles
orbits = filter(x->occursin(".zarr", string(x)), orbitfiles)
@show orbits
orbitname = map(o->split(basename(o),'_')[4],orbits)
d = DD.format(Dim{:orbit}(orbitname))
files = DD.DimArray(orbits,d)
ds = open_mfdataset(string.(files))
nx, ny = size(ds.layer)
outds_skeleton = Dataset(;defo=YAXArray((ds.X,ds.Y),Fill(UInt8(0),nx,ny),chunks=DAE.GridChunks((nx,ny),(256,256))))
dsout = savedataset(outds_skeleton,path=outpath,skeleton=true,overwrite=true)
forest = Cube(forpath)
#masked = map(*, ds.layer, setchunks(forest,DiskArrays.eachchunk(ds.layer.chunks)))
@time postprocess(ds.layer,dsout.defo, forest)
@time PyramidScheme.buildpyramids(outpath)
end

# Open input data
#=
p = "/Net/Groups/BGI/work_5/scratch/fgans/germany_2020/"
outpath = "./output.zarr/"
orbits = readdir(p)
orbitname = map(o->split(basename(o),'_')[4],orbits)
d = DD.format(Dim{:orbit}(orbitname))
files = DD.DimArray(orbits,d)
ds = open_mfdataset(files)
#Prepare output dataset to write into, this might also be a view into an existing dataset
nx,ny = size(ds.layer)
outds_skeleton = Dataset(;defo=YAXArray((ds.X,ds.Y),Fill(UInt8(0),nx,ny),chunks=DAE.GridChunks((nx,ny),(256,256))))
dsout = savedataset(outds_skeleton,path=outpath,skeleton=true,overwrite=true)

#Call the postprocess function
postprocess(ds.layer,dsout.defo)

=#
98 changes: 78 additions & 20 deletions src/main.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
using ArgParse
using YAXArrays: YAXDefaults
using ArchGDAL: ArchGDAL
using PyramidScheme
using AWSS3: global_aws_config, S3Path
using FilePathsBase: exists


#using YAXArrays, Zarr
using Minio: MinioConfig

global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1"))

const argparsesettings = ArgParseSettings()

ArgParse.parse_item(::Type{Date}, x::AbstractString) = Date(x)
Expand Down Expand Up @@ -69,72 +78,121 @@ function main(;
tiles::Vector{String},
continent::String,
indir::String,
outdir="out.zarr",
outstore=Zarr.S3Store("europe-forest-change"),
outdir="results",
tempfolder = S3Path(outstore.bucket, "intermediates/"),
start_date::Date,
end_date::Date,
polarisation="VH",
orbit="D",
threshold=3.0,
folders=["V1M0R1", "V1M1R1", "V1M1R2"],
stack=:dae
stack=:dae,
postprocess=true,
forestdir="data/forest20m_new",
delete_intermediate=false
)
#global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1",username="ufew8gJku5hRY7VD6jbEjRi8VnvDfeEv",password="dqZdzWCLB7a9gTshL29AnQWGqL3krwnS"))
in(orbit, ["A", "D"]) || error("Orbit needs to be either A or D")
if isdir(indir) && isempty(indir)
error("Input directory $indir must not be empty")
end
if isdir(outdir)

if isdir(tempfolder)
@warn "Resume from existing output directory"
else
mkdir(outdir)
mkdir(tempfolder, recursive=true)
@info "Write output to $outdir"
end

if monthday(start_date) != monthday(end_date)
@warn "Selected time series does not include a multiple of whole years. This might introduce seasonal bias."
end

YAXDefaults.workdir[] = outdir
YAXDefaults.workdir[] = tempfolder
@show typeof(tempfolder)

corruptedfiles = "corrupted_tiles.txt"
corruptedfiles = open("corrupted_tiles.txt", "w")
# TODO save the corrupt files to a txt for investigation
for tilefolder in tiles
filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders]
@show tilefolder
outpath = joinpath(outdir, "postprocess_$tilefolder.zarr/")
@show outpath
if outpath in Zarr.subdirs(outstore, outdir)
println("Skip already processed tile $tilefolder")
continue
end
sub = first(folders)
#@show glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir)
filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders]
allfilenames = collect(Iterators.flatten(filenamelist))

#@show allfilenames
relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames])
@show relorbits

for relorbit in relorbits
path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)"))
#s3path = "s3://"*joinpath(outstore.bucket, path)
@show path
exists(path * ".done") && continue
exists(path * "_zerotimesteps.done") && continue
filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)]
@time cube = gdalcube(filenames, stack)


path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)")
@show path
ispath(path * ".done") && continue
ispath(path * "_zerotimesteps.done") && continue

tcube = cube[Time=start_date .. end_date]
@show size(cube)
@show size(tcube)
if size(tcube, 3) == 0
touch(path * "_zerotimesteps.done")
touch(S3Path(path * "_zerotimesteps.done"))
continue
end
try
outpath = path * ".zarr"
@time rqatrend(tcube; thresh=threshold, outpath=outpath, overwrite=true)
Zarr.consolidate_metadata(outpath)
orbitoutpath = string(path * ".zarr/")
# This is only necessary because overwrite=true doesn't work on S3 based Zarr files in YAXArrays
# See https://github.com/JuliaDataCubes/YAXArrays.jl/issues/511
if exists(S3Path(orbitoutpath))
println("Deleting path $orbitoutpath")
rm(S3Path(orbitoutpath), recursive=true)
end
@show orbitoutpath
# This seems to ignore the overwrite keyword when the outpath point to S3.
@time rqatrend(tcube; thresh=threshold, outpath=orbitoutpath, overwrite=true)
if delete_intermediate == false
PyramidScheme.buildpyramids(orbitoutpath)
Zarr.consolidate_metadata(orbitoutpath)
end
catch e

if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError
println("Found GDALError:")
println(e.captured.ex.msg)
println(corruptedfiles, "Found GDALError:")
println(corruptedfiles, e.captured.ex.msg)
continue
else
rethrow(e)
end
end
touch(path * ".done")
donepath = path * ".done"
@show donepath
touch(S3Path(path * ".done"))
end
if postprocess
@show outpath
DD.STRICT_BROADCAST_CHECKS[] = false

RQADeforestation.postprocess(tilefolder, tempfolder, outpath, forestdir)
Zarr.consolidate_metadata(outpath)
DD.STRICT_BROADCAST_CHECKS[] = true
#base = basename(outpath)
#@show base
#command = `aws --endpoint-url http://s3.fairsendd.eodchosting.eu s3 cp --recursive $outpath s3://europe-forest-change/$base`

#run(command)
end
if delete_intermediate
rm(tempfolder, force=true, recursive=true)
end


end
end
3 changes: 2 additions & 1 deletion src/rqatrend.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ end

Compute the RQA trend metric for the data that is available on `path`.
"""
rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...)
rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) =
rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...)


"""
Expand Down
Loading