diff --git a/Project.toml b/Project.toml index fe1a238..4f683b0 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ authors = ["Felix Cremer , Daniel Loos occursin(tile,string(x)), allfiles) + orbitfiles = filter(x -> occursin(tile, string(x)), allfiles) @show orbitfiles - orbits = filter(x->occursin(".zarr", string(x)), orbitfiles) + orbits = filter(x -> occursin(".zarr", string(x)), orbitfiles) @show orbits - orbitname = map(o->split(basename(o),'_')[4],orbits) + orbitname = map(o -> split(basename(o), '_')[4], orbits) d = DD.format(Dim{:orbit}(orbitname)) - files = DD.DimArray(orbits,d) + files = DD.DimArray(orbits, d) ds = open_mfdataset(string.(files)) nx, ny = size(ds.layer) - outds_skeleton = Dataset(;defo=YAXArray((ds.X,ds.Y),Fill(UInt8(0),nx,ny),chunks=DAE.GridChunks((nx,ny),(256,256)))) - dsout = savedataset(outds_skeleton,path=outpath,skeleton=true,overwrite=true) + outds_skeleton = Dataset(; + defo=YAXArray( + (ds.X, ds.Y), + Fill(UInt8(0), nx, ny); + chunks=DAE.GridChunks((nx, ny), (256, 256)), + ), + ) + dsout = savedataset(outds_skeleton; path=outpath, skeleton=true, overwrite=true) forest = Cube(forpath) #masked = map(*, ds.layer, setchunks(forest,DiskArrays.eachchunk(ds.layer.chunks))) - @time postprocess(ds.layer,dsout.defo, forest) + @time postprocess(ds.layer, dsout.defo, forest) @time PyramidScheme.buildpyramids(outpath) end diff --git a/src/main.jl b/src/main.jl index c09626c..3d535f6 100644 --- a/src/main.jl +++ b/src/main.jl @@ -1,16 +1,7 @@ using ArgParse using YAXArrays: YAXDefaults -using ArchGDAL: ArchGDAL -using PyramidScheme -using AWSS3: global_aws_config, S3Path -using FilePathsBase: exists -#using YAXArrays, Zarr -using Minio: MinioConfig - -global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1")) - const argparsesettings = ArgParseSettings() ArgParse.parse_item(::Type{Date}, x::AbstractString) = Date(x) @@ -78,121 +69,72 @@ function main(; tiles::Vector{String}, continent::String, indir::String, - outstore=Zarr.S3Store("europe-forest-change"), - outdir="results", - tempfolder = S3Path(outstore.bucket, "intermediates/"), + outdir="out.zarr", start_date::Date, end_date::Date, polarisation="VH", orbit="D", threshold=3.0, folders=["V1M0R1", "V1M1R1", "V1M1R2"], - stack=:dae, - postprocess=true, - forestdir="data/forest20m_new", - delete_intermediate=false + stack=:dae ) -#global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1",username="ufew8gJku5hRY7VD6jbEjRi8VnvDfeEv",password="dqZdzWCLB7a9gTshL29AnQWGqL3krwnS")) in(orbit, ["A", "D"]) || error("Orbit needs to be either A or D") if isdir(indir) && isempty(indir) error("Input directory $indir must not be empty") end - - if isdir(tempfolder) + if isdir(outdir) @warn "Resume from existing output directory" else - mkdir(tempfolder, recursive=true) + mkdir(outdir) @info "Write output to $outdir" end + if monthday(start_date) != monthday(end_date) @warn "Selected time series does not include a multiple of whole years. This might introduce seasonal bias." end - YAXDefaults.workdir[] = tempfolder - @show typeof(tempfolder) + YAXDefaults.workdir[] = outdir - corruptedfiles = open("corrupted_tiles.txt", "w") + corruptedfiles = "corrupted_tiles.txt" # TODO save the corrupt files to a txt for investigation for tilefolder in tiles - @show tilefolder - outpath = joinpath(outdir, "postprocess_$tilefolder.zarr/") - @show outpath - if outpath in Zarr.subdirs(outstore, outdir) - println("Skip already processed tile $tilefolder") - continue - end - sub = first(folders) - #@show glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) - filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders] + filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders] allfilenames = collect(Iterators.flatten(filenamelist)) - #@show allfilenames + relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames]) @show relorbits for relorbit in relorbits - path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)")) - #s3path = "s3://"*joinpath(outstore.bucket, path) - @show path - exists(path * ".done") && continue - exists(path * "_zerotimesteps.done") && continue filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)] @time cube = gdalcube(filenames, stack) - + path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)") + @show path + ispath(path * ".done") && continue + ispath(path * "_zerotimesteps.done") && continue tcube = cube[Time=start_date .. end_date] @show size(cube) @show size(tcube) if size(tcube, 3) == 0 - touch(S3Path(path * "_zerotimesteps.done")) + touch(path * "_zerotimesteps.done") continue end try - orbitoutpath = string(path * ".zarr/") - # This is only necessary because overwrite=true doesn't work on S3 based Zarr files in YAXArrays - # See https://github.com/JuliaDataCubes/YAXArrays.jl/issues/511 - if exists(S3Path(orbitoutpath)) - println("Deleting path $orbitoutpath") - rm(S3Path(orbitoutpath), recursive=true) - end - @show orbitoutpath - # This seems to ignore the overwrite keyword when the outpath point to S3. - @time rqatrend(tcube; thresh=threshold, outpath=orbitoutpath, overwrite=true) - if delete_intermediate == false - PyramidScheme.buildpyramids(orbitoutpath) - Zarr.consolidate_metadata(orbitoutpath) - end + outpath = path * ".zarr" + @time rqatrend(tcube; thresh=threshold, outpath=outpath, overwrite=true) + Zarr.consolidate_metadata(outpath) catch e if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError - println(corruptedfiles, "Found GDALError:") - println(corruptedfiles, e.captured.ex.msg) + println("Found GDALError:") + println(e.captured.ex.msg) continue else rethrow(e) end end - donepath = path * ".done" - @show donepath - touch(S3Path(path * ".done")) - end - if postprocess - @show outpath - DD.STRICT_BROADCAST_CHECKS[] = false - - RQADeforestation.postprocess(tilefolder, tempfolder, outpath, forestdir) - Zarr.consolidate_metadata(outpath) - DD.STRICT_BROADCAST_CHECKS[] = true - #base = basename(outpath) - #@show base - #command = `aws --endpoint-url http://s3.fairsendd.eodchosting.eu s3 cp --recursive $outpath s3://europe-forest-change/$base` - - #run(command) + touch(path * ".done") end - if delete_intermediate - rm(tempfolder, force=true, recursive=true) - end - - end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index b413803..968e1c9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,3 +1,4 @@ +using LazyArtifacts using RQADeforestation # doctests do not run as testitem as of now, hence it is included here diff --git a/test/testdata.jl b/test/testdata.jl index 2b548a1..078f78a 100644 --- a/test/testdata.jl +++ b/test/testdata.jl @@ -1,6 +1,7 @@ @testitem "testdata main" begin import Pkg: Artifacts.@artifact_str + using LazyArtifacts testdatapath = artifact"rqatestdata/RQADeforestationTestData-2.0" testdir = tempname() @@ -55,4 +56,4 @@ end copy!(ARGS, OLD_ARGS) @test outdir |> readdir |> length > 1 -end \ No newline at end of file +end