diff --git a/Project.toml b/Project.toml index 8f2abac..5b65689 100644 --- a/Project.toml +++ b/Project.toml @@ -4,9 +4,11 @@ authors = ["Felix Cremer , Daniel Loos occursin(tile,string(x)), allfiles) + @show orbitfiles + orbits = filter(x->occursin(".zarr", string(x)), orbitfiles) + @show orbits + orbitname = map(o->split(basename(o),'_')[4],orbits) + d = DD.format(Dim{:orbit}(orbitname)) + files = DD.DimArray(orbits,d) + ds = open_mfdataset(string.(files)) + nx, ny = size(ds.layer) + outds_skeleton = Dataset(;defo=YAXArray((ds.X,ds.Y),Fill(UInt8(0),nx,ny),chunks=DAE.GridChunks((nx,ny),(256,256)))) + dsout = savedataset(outds_skeleton,path=outpath,skeleton=true,overwrite=true) + forest = Cube(forpath) + #masked = map(*, ds.layer, setchunks(forest,DiskArrays.eachchunk(ds.layer.chunks))) + @time postprocess(ds.layer,dsout.defo, forest) + @time PyramidScheme.buildpyramids(outpath) +end + +# Open input data +#= +p = "/Net/Groups/BGI/work_5/scratch/fgans/germany_2020/" +outpath = "./output.zarr/" +orbits = readdir(p) +orbitname = map(o->split(basename(o),'_')[4],orbits) +d = DD.format(Dim{:orbit}(orbitname)) +files = DD.DimArray(orbits,d) +ds = open_mfdataset(files) +#Prepare output dataset to write into, this might also be a view into an existing dataset +nx,ny = size(ds.layer) +outds_skeleton = Dataset(;defo=YAXArray((ds.X,ds.Y),Fill(UInt8(0),nx,ny),chunks=DAE.GridChunks((nx,ny),(256,256)))) +dsout = savedataset(outds_skeleton,path=outpath,skeleton=true,overwrite=true) + +#Call the postprocess function +postprocess(ds.layer,dsout.defo) + +=# \ No newline at end of file diff --git a/src/main.jl b/src/main.jl index 3d535f6..c09626c 100644 --- a/src/main.jl +++ b/src/main.jl @@ -1,7 +1,16 @@ using ArgParse using YAXArrays: YAXDefaults +using ArchGDAL: ArchGDAL +using PyramidScheme +using AWSS3: global_aws_config, S3Path +using FilePathsBase: exists +#using YAXArrays, Zarr +using Minio: MinioConfig + +global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1")) + const argparsesettings = ArgParseSettings() ArgParse.parse_item(::Type{Date}, x::AbstractString) = Date(x) @@ -69,72 +78,121 @@ function main(; tiles::Vector{String}, continent::String, indir::String, - outdir="out.zarr", + outstore=Zarr.S3Store("europe-forest-change"), + outdir="results", + tempfolder = S3Path(outstore.bucket, "intermediates/"), start_date::Date, end_date::Date, polarisation="VH", orbit="D", threshold=3.0, folders=["V1M0R1", "V1M1R1", "V1M1R2"], - stack=:dae + stack=:dae, + postprocess=true, + forestdir="data/forest20m_new", + delete_intermediate=false ) +#global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1",username="ufew8gJku5hRY7VD6jbEjRi8VnvDfeEv",password="dqZdzWCLB7a9gTshL29AnQWGqL3krwnS")) in(orbit, ["A", "D"]) || error("Orbit needs to be either A or D") if isdir(indir) && isempty(indir) error("Input directory $indir must not be empty") end - if isdir(outdir) + + if isdir(tempfolder) @warn "Resume from existing output directory" else - mkdir(outdir) + mkdir(tempfolder, recursive=true) @info "Write output to $outdir" end - if monthday(start_date) != monthday(end_date) @warn "Selected time series does not include a multiple of whole years. This might introduce seasonal bias." end - YAXDefaults.workdir[] = outdir + YAXDefaults.workdir[] = tempfolder + @show typeof(tempfolder) - corruptedfiles = "corrupted_tiles.txt" + corruptedfiles = open("corrupted_tiles.txt", "w") # TODO save the corrupt files to a txt for investigation for tilefolder in tiles - filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders] + @show tilefolder + outpath = joinpath(outdir, "postprocess_$tilefolder.zarr/") + @show outpath + if outpath in Zarr.subdirs(outstore, outdir) + println("Skip already processed tile $tilefolder") + continue + end + sub = first(folders) + #@show glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) + filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders] allfilenames = collect(Iterators.flatten(filenamelist)) - + #@show allfilenames relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames]) @show relorbits for relorbit in relorbits + path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)")) + #s3path = "s3://"*joinpath(outstore.bucket, path) + @show path + exists(path * ".done") && continue + exists(path * "_zerotimesteps.done") && continue filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)] @time cube = gdalcube(filenames, stack) + - path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)") - @show path - ispath(path * ".done") && continue - ispath(path * "_zerotimesteps.done") && continue tcube = cube[Time=start_date .. end_date] @show size(cube) @show size(tcube) if size(tcube, 3) == 0 - touch(path * "_zerotimesteps.done") + touch(S3Path(path * "_zerotimesteps.done")) continue end try - outpath = path * ".zarr" - @time rqatrend(tcube; thresh=threshold, outpath=outpath, overwrite=true) - Zarr.consolidate_metadata(outpath) + orbitoutpath = string(path * ".zarr/") + # This is only necessary because overwrite=true doesn't work on S3 based Zarr files in YAXArrays + # See https://github.com/JuliaDataCubes/YAXArrays.jl/issues/511 + if exists(S3Path(orbitoutpath)) + println("Deleting path $orbitoutpath") + rm(S3Path(orbitoutpath), recursive=true) + end + @show orbitoutpath + # This seems to ignore the overwrite keyword when the outpath point to S3. + @time rqatrend(tcube; thresh=threshold, outpath=orbitoutpath, overwrite=true) + if delete_intermediate == false + PyramidScheme.buildpyramids(orbitoutpath) + Zarr.consolidate_metadata(orbitoutpath) + end catch e if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError - println("Found GDALError:") - println(e.captured.ex.msg) + println(corruptedfiles, "Found GDALError:") + println(corruptedfiles, e.captured.ex.msg) continue else rethrow(e) end end - touch(path * ".done") + donepath = path * ".done" + @show donepath + touch(S3Path(path * ".done")) + end + if postprocess + @show outpath + DD.STRICT_BROADCAST_CHECKS[] = false + + RQADeforestation.postprocess(tilefolder, tempfolder, outpath, forestdir) + Zarr.consolidate_metadata(outpath) + DD.STRICT_BROADCAST_CHECKS[] = true + #base = basename(outpath) + #@show base + #command = `aws --endpoint-url http://s3.fairsendd.eodchosting.eu s3 cp --recursive $outpath s3://europe-forest-change/$base` + + #run(command) end + if delete_intermediate + rm(tempfolder, force=true, recursive=true) + end + + end end \ No newline at end of file diff --git a/src/rqatrend.jl b/src/rqatrend.jl index 17967aa..2ef3f2d 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -38,7 +38,8 @@ end Compute the RQA trend metric for the data that is available on `path`. """ -rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...) +rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = + rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...) """