Skip to content

Commit d0863a2

Browse files
danloooFelix Cremer
andauthored
Fix tests (#120)
* Remove timestats * Remove PyramidScheme * Use LazyArtifacts * Add LazyArtifacts import to testdata.jl * Fix ambiguity --------- Co-authored-by: Felix Cremer <[email protected]>
1 parent 52498c1 commit d0863a2

File tree

6 files changed

+81
-121
lines changed

6 files changed

+81
-121
lines changed

Project.toml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ authors = ["Felix Cremer <[email protected]>, Daniel Loos <dloos@bgc-
44
version = "1.0.0-DEV"
55

66
[deps]
7-
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
87
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
98
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
109
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
@@ -25,14 +24,13 @@ GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f"
2524
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
2625
ImageMorphology = "787d08f9-d448-5407-9aad-5290dd7ab264"
2726
KML = "1284bf3a-1e3d-4f4e-a7a9-b9d235a28f35"
27+
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
2828
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
2929
LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
30-
Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20"
3130
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
3231
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
3332
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
3433
Proj = "c94c279d-25a6-4763-9509-64d165bea63e"
35-
PyramidScheme = "ec211b67-1c2c-4319-878f-eaee078ee145"
3634
Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689"
3735
RecurrenceAnalysis = "639c3291-70d9-5ea2-8c5b-839eba1ee399"
3836
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
@@ -46,7 +44,6 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
4644
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
4745

4846
[compat]
49-
AWSS3 = "0.10.4"
5047
AllocCheck = "0.2.2"
5148
Aqua = "0.8.13"
5249
ArchGDAL = "0.10"
@@ -71,10 +68,10 @@ GeoFormatTypes = "0.4"
7168
Glob = "1"
7269
ImageMorphology = "0.4.5"
7370
KML = "0.2"
71+
LazyArtifacts = "1.11.0"
7472
Libdl = "1.10"
7573
LinearAlgebra = "1.10"
7674
LoggingExtras = "1"
77-
Minio = "0.2.2"
7875
Missings = "1"
7976
NetCDF = "0.12"
8077
Pkg = "1.10"
@@ -93,7 +90,7 @@ TestItems = "1.0"
9390
TimeseriesSurrogates = "2"
9491
UnicodePlots = "3"
9592
YAXArrayBase = "0.6, 0.7"
96-
YAXArrays = "0.5, 0.6"
93+
YAXArrays = "0.5, 0.6, 0.7"
9794
Zarr = "0.9"
9895
julia = "1.11"
9996

@@ -102,13 +99,13 @@ AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
10299
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
103100
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
104101
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
102+
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
105103
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
106104
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
107105
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
108106
StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a"
109107
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
110108
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
111-
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
112109

113110
[targets]
114111
test = ["Test", "TestItemRunner", "Pkg", "Random", "AllocCheck", "BenchmarkTools", "Aqua", "Documenter", "StaticTools", "PythonCall", "Libdl"]

src/RQADeforestation.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ using Zarr
88
using Distributed: myid
99
using NetCDF
1010
using TestItems
11-
using PyramidScheme: PyramidScheme
1211
using DimensionalData: DimensionalData as DD
1312

1413
export gdalcube, rqatrend
@@ -18,7 +17,6 @@ include("auxil.jl")
1817
include("rqatrend.jl")
1918
include("analysis.jl") # TODO what is still needed from analysis now that rqatrend is in its own file?
2019
include("cluster.jl")
21-
include("timestats.jl")
2220
include("main.jl")
2321

2422
end

src/cluster.jl

Lines changed: 54 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,78 +7,99 @@ using FillArrays
77
#maskfolder = "data/forestcompressed"
88

99
function meanvote(orbits, significance_thresh=-1.28)
10-
s,n = 0.0,0
10+
s, n = 0.0, 0
1111
for i in eachindex(orbits)
1212
if orbits[i] != 0 && !isnan(orbits[i])
1313
s += orbits[i]
1414
n += 1
1515
end
1616
end
17-
m = s/n
18-
m < significance_thresh ? 1 : 0
17+
m = s / n
18+
return m < significance_thresh ? 1 : 0
1919
end
2020

21-
function filtersmallcomps!(xout,xin_unfiltered,forestmask,comborbits,connsize;dims=:,threaded=false)
22-
xin = broadcast(xin_unfiltered,forestmask) do x,m
23-
ismissing(m) ? zero(x) : x*m
21+
function filtersmallcomps!(
22+
xout, xin_unfiltered, forestmask, comborbits, connsize; dims=:, threaded=false
23+
)
24+
xin = broadcast(xin_unfiltered, forestmask) do x, m
25+
ismissing(m) ? zero(x) : x * m
2426
end
25-
x = similar(Array{Float64},(axes(xin,1),axes(xin,2),Base.OneTo(1)))
26-
for j in axes(x,2), i in axes(x,1)
27-
x[i,j,1] = comborbits(view(xin,i,j,:))
27+
x = similar(Array{Float64}, (axes(xin, 1), axes(xin, 2), Base.OneTo(1)))
28+
for j in axes(x, 2), i in axes(x, 1)
29+
x[i, j, 1] = comborbits(view(xin, i, j, :))
2830
end
2931
lc = label_components(x)
3032
c = counter(lc)
3133
for ix in eachindex(xout)
3234
v = lc[ix]
33-
if v==0 || c[v] < connsize
35+
if v == 0 || c[v] < connsize
3436
xout[ix] = 0
3537
else
3638
xout[ix] = 1
3739
end
3840
end
3941
end
4042

41-
function postprocess(a,target_array::YAXArray,forestmask::YAXArray, orbitcombine=meanvote;minsize=30,max_cache=5e8)
42-
nx,ny,nz = size(a)
43-
windowsx = DAE.MovingWindow(1 - minsize,1,2*minsize + 1,nx,(1,nx))
44-
windowsy = DAE.MovingWindow(1 - minsize,1,2*minsize + 1,ny,(1,ny))
43+
function postprocess(
44+
a::YAXArray,
45+
target_array::YAXArray,
46+
forestmask::YAXArray,
47+
orbitcombine=meanvote;
48+
minsize=30,
49+
max_cache=5e8,
50+
)
51+
nx, ny, nz = size(a)
52+
windowsx = DAE.MovingWindow(1 - minsize, 1, 2 * minsize + 1, nx, (1, nx))
53+
windowsy = DAE.MovingWindow(1 - minsize, 1, 2 * minsize + 1, ny, (1, ny))
4554
windowsz = [1:nz]
46-
inar1 = DAE.InputArray(a.data,windows=(windowsx,windowsy,windowsz));
47-
inar2 = DAE.InputArray(forestmask.data, windows=(windowsx,windowsy));
48-
inars = (inar1,inar2)
49-
outchunks = (target_array.chunks.chunks...,DAE.RegularChunks(1,0,1))
50-
outars = DAE.create_outwindows((nx,ny,1),chunks=outchunks);
51-
uf = DAE.create_userfunction(filtersmallcomps!,UInt8,is_blockfunction=true,is_mutating=true,args=(orbitcombine,minsize))
52-
op = DAE.GMDWop(inars,(outars,),uf)
53-
plan = DAE.optimize_loopranges(op,max_cache)
54-
runner=DAE.LocalRunner(op,plan,(reshape(target_array.data,(nx,ny,1)),))
55+
inar1 = DAE.InputArray(a.data; windows=(windowsx, windowsy, windowsz))
56+
inar2 = DAE.InputArray(forestmask.data; windows=(windowsx, windowsy))
57+
inars = (inar1, inar2)
58+
outchunks = (target_array.chunks.chunks..., DAE.RegularChunks(1, 0, 1))
59+
outars = DAE.create_outwindows((nx, ny, 1); chunks=outchunks)
60+
uf = DAE.create_userfunction(
61+
filtersmallcomps!,
62+
UInt8;
63+
is_blockfunction=true,
64+
is_mutating=true,
65+
args=(orbitcombine, minsize),
66+
)
67+
op = DAE.GMDWop(inars, (outars,), uf)
68+
plan = DAE.optimize_loopranges(op, max_cache)
69+
runner = DAE.LocalRunner(op, plan, (reshape(target_array.data, (nx, ny, 1)),))
5570
run(runner)
56-
target_array
71+
return target_array
5772
end
5873

5974
function postprocess(tile::AbstractString, indir, outpath, maskfolder)
6075
if isdir(outpath)
61-
return
76+
return nothing
6277
end
6378

6479
forpath = only(glob("*$tile*", maskfolder))
6580
@show forpath
66-
allfiles = readdir(indir, join=true)
81+
allfiles = readdir(indir; join=true)
6782
@show allfiles
68-
orbitfiles = filter(x->occursin(tile,string(x)), allfiles)
83+
orbitfiles = filter(x -> occursin(tile, string(x)), allfiles)
6984
@show orbitfiles
70-
orbits = filter(x->occursin(".zarr", string(x)), orbitfiles)
85+
orbits = filter(x -> occursin(".zarr", string(x)), orbitfiles)
7186
@show orbits
72-
orbitname = map(o->split(basename(o),'_')[4],orbits)
87+
orbitname = map(o -> split(basename(o), '_')[4], orbits)
7388
d = DD.format(Dim{:orbit}(orbitname))
74-
files = DD.DimArray(orbits,d)
89+
files = DD.DimArray(orbits, d)
7590
ds = open_mfdataset(string.(files))
7691
nx, ny = size(ds.layer)
77-
outds_skeleton = Dataset(;defo=YAXArray((ds.X,ds.Y),Fill(UInt8(0),nx,ny),chunks=DAE.GridChunks((nx,ny),(256,256))))
78-
dsout = savedataset(outds_skeleton,path=outpath,skeleton=true,overwrite=true)
92+
outds_skeleton = Dataset(;
93+
defo=YAXArray(
94+
(ds.X, ds.Y),
95+
Fill(UInt8(0), nx, ny);
96+
chunks=DAE.GridChunks((nx, ny), (256, 256)),
97+
),
98+
)
99+
dsout = savedataset(outds_skeleton; path=outpath, skeleton=true, overwrite=true)
79100
forest = Cube(forpath)
80101
#masked = map(*, ds.layer, setchunks(forest,DiskArrays.eachchunk(ds.layer.chunks)))
81-
@time postprocess(ds.layer,dsout.defo, forest)
102+
@time postprocess(ds.layer, dsout.defo, forest)
82103
@time PyramidScheme.buildpyramids(outpath)
83104
end
84105

src/main.jl

Lines changed: 20 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
11
using ArgParse
22
using YAXArrays: YAXDefaults
3-
using ArchGDAL: ArchGDAL
4-
using PyramidScheme
5-
using AWSS3: global_aws_config, S3Path
6-
using FilePathsBase: exists
73

84

9-
#using YAXArrays, Zarr
10-
using Minio: MinioConfig
11-
12-
global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1"))
13-
145
const argparsesettings = ArgParseSettings()
156

167
ArgParse.parse_item(::Type{Date}, x::AbstractString) = Date(x)
@@ -78,121 +69,72 @@ function main(;
7869
tiles::Vector{String},
7970
continent::String,
8071
indir::String,
81-
outstore=Zarr.S3Store("europe-forest-change"),
82-
outdir="results",
83-
tempfolder = S3Path(outstore.bucket, "intermediates/"),
72+
outdir="out.zarr",
8473
start_date::Date,
8574
end_date::Date,
8675
polarisation="VH",
8776
orbit="D",
8877
threshold=3.0,
8978
folders=["V1M0R1", "V1M1R1", "V1M1R2"],
90-
stack=:dae,
91-
postprocess=true,
92-
forestdir="data/forest20m_new",
93-
delete_intermediate=false
79+
stack=:dae
9480
)
95-
#global_aws_config(MinioConfig("http://s3.fairsendd.eodchosting.eu",region="us-east-1",username="ufew8gJku5hRY7VD6jbEjRi8VnvDfeEv",password="dqZdzWCLB7a9gTshL29AnQWGqL3krwnS"))
9681
in(orbit, ["A", "D"]) || error("Orbit needs to be either A or D")
9782
if isdir(indir) && isempty(indir)
9883
error("Input directory $indir must not be empty")
9984
end
100-
101-
if isdir(tempfolder)
85+
if isdir(outdir)
10286
@warn "Resume from existing output directory"
10387
else
104-
mkdir(tempfolder, recursive=true)
88+
mkdir(outdir)
10589
@info "Write output to $outdir"
10690
end
91+
10792
if monthday(start_date) != monthday(end_date)
10893
@warn "Selected time series does not include a multiple of whole years. This might introduce seasonal bias."
10994
end
11095

111-
YAXDefaults.workdir[] = tempfolder
112-
@show typeof(tempfolder)
96+
YAXDefaults.workdir[] = outdir
11397

114-
corruptedfiles = open("corrupted_tiles.txt", "w")
98+
corruptedfiles = "corrupted_tiles.txt"
11599
# TODO save the corrupt files to a txt for investigation
116100
for tilefolder in tiles
117-
@show tilefolder
118-
outpath = joinpath(outdir, "postprocess_$tilefolder.zarr/")
119-
@show outpath
120-
if outpath in Zarr.subdirs(outstore, outdir)
121-
println("Skip already processed tile $tilefolder")
122-
continue
123-
end
124-
sub = first(folders)
125-
#@show glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir)
126-
filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)*/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders]
101+
filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders]
127102
allfilenames = collect(Iterators.flatten(filenamelist))
128-
#@show allfilenames
103+
129104
relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames])
130105
@show relorbits
131106

132107
for relorbit in relorbits
133-
path = S3Path(joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)"))
134-
#s3path = "s3://"*joinpath(outstore.bucket, path)
135-
@show path
136-
exists(path * ".done") && continue
137-
exists(path * "_zerotimesteps.done") && continue
138108
filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)]
139109
@time cube = gdalcube(filenames, stack)
140-
141110

111+
path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)")
112+
@show path
113+
ispath(path * ".done") && continue
114+
ispath(path * "_zerotimesteps.done") && continue
142115

143116
tcube = cube[Time=start_date .. end_date]
144117
@show size(cube)
145118
@show size(tcube)
146119
if size(tcube, 3) == 0
147-
touch(S3Path(path * "_zerotimesteps.done"))
120+
touch(path * "_zerotimesteps.done")
148121
continue
149122
end
150123
try
151-
orbitoutpath = string(path * ".zarr/")
152-
# This is only necessary because overwrite=true doesn't work on S3 based Zarr files in YAXArrays
153-
# See https://github.com/JuliaDataCubes/YAXArrays.jl/issues/511
154-
if exists(S3Path(orbitoutpath))
155-
println("Deleting path $orbitoutpath")
156-
rm(S3Path(orbitoutpath), recursive=true)
157-
end
158-
@show orbitoutpath
159-
# This seems to ignore the overwrite keyword when the outpath point to S3.
160-
@time rqatrend(tcube; thresh=threshold, outpath=orbitoutpath, overwrite=true)
161-
if delete_intermediate == false
162-
PyramidScheme.buildpyramids(orbitoutpath)
163-
Zarr.consolidate_metadata(orbitoutpath)
164-
end
124+
outpath = path * ".zarr"
125+
@time rqatrend(tcube; thresh=threshold, outpath=outpath, overwrite=true)
126+
Zarr.consolidate_metadata(outpath)
165127
catch e
166128

167129
if hasproperty(e, :captured) && e.captured.ex isa ArchGDAL.GDAL.GDALError
168-
println(corruptedfiles, "Found GDALError:")
169-
println(corruptedfiles, e.captured.ex.msg)
130+
println("Found GDALError:")
131+
println(e.captured.ex.msg)
170132
continue
171133
else
172134
rethrow(e)
173135
end
174136
end
175-
donepath = path * ".done"
176-
@show donepath
177-
touch(S3Path(path * ".done"))
178-
end
179-
if postprocess
180-
@show outpath
181-
DD.STRICT_BROADCAST_CHECKS[] = false
182-
183-
RQADeforestation.postprocess(tilefolder, tempfolder, outpath, forestdir)
184-
Zarr.consolidate_metadata(outpath)
185-
DD.STRICT_BROADCAST_CHECKS[] = true
186-
#base = basename(outpath)
187-
#@show base
188-
#command = `aws --endpoint-url http://s3.fairsendd.eodchosting.eu s3 cp --recursive $outpath s3://europe-forest-change/$base`
189-
190-
#run(command)
137+
touch(path * ".done")
191138
end
192-
if delete_intermediate
193-
rm(tempfolder, force=true, recursive=true)
194-
end
195-
196-
197139
end
198140
end

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using LazyArtifacts
12
using RQADeforestation
23

34
# doctests do not run as testitem as of now, hence it is included here

test/testdata.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
@testitem "testdata main" begin
33
import Pkg: Artifacts.@artifact_str
4+
using LazyArtifacts
45
testdatapath = artifact"rqatestdata/RQADeforestationTestData-2.0"
56

67
testdir = tempname()
@@ -55,4 +56,4 @@ end
5556
copy!(ARGS, OLD_ARGS)
5657

5758
@test outdir |> readdir |> length > 1
58-
end
59+
end

0 commit comments

Comments
 (0)