Skip to content

Commit 2b1ad4f

Browse files
felixcremerFelix Cremer
andauthored
Aggregate different scenes with DAE (#79)
* Aggregate different scenes with DAE This is currently not using the GDALBufferBand but just plain cat. * Build concatDiskArray directly without making timestep data into cube * Increase DiskArrayEngine version * Use BufferGDALBand and add stackgroups argument to gdalcube * Add orbit info to outputpath * Use grouping index in aggregate_diskarray * Add lazydiskaggregate to stack data * Aggregate different scenes with DAE This is currently not using the GDALBufferBand but just plain cat. * Build concatDiskArray directly without making timestep data into cube * Increase DiskArrayEngine version * Use BufferGDALBand and add stackgroups argument to gdalcube * Add orbit info to outputpath * Use grouping index in aggregate_diskarray * Add lazydiskaggregate to stack data * Remove wrong input folders * Set default orbit to "A" in parseargs --------- Co-authored-by: Felix Cremer <[email protected]>
1 parent e0a6a13 commit 2b1ad4f

File tree

4 files changed

+104
-14
lines changed

4 files changed

+104
-14
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ ArchGDAL = "0.10"
4444
ArgParse = "1"
4545
ConstructionBase = "1"
4646
DimensionalData = "0.29"
47-
DiskArrayEngine = "0.2"
48-
DiskArrayTools = "0.1.12"
47+
DiskArrayEngine = "0.1, 0.2"
48+
DiskArrayTools = "0.1"
4949
DiskArrays = "0.4"
5050
Distances = "0.10"
5151
Distributions = "0.25"

src/analysis.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
using RecurrenceAnalysis
1+
using RecurrenceAnalysis: RecurrenceAnalysis as RA
22
#using MultipleTesting
33
using Distances
4-
const RA = RecurrenceAnalysis
54
"""
65
countvalid(xout, xin)
76

src/auxil.jl

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,47 @@
11
using YAXArrayBase: backendlist, get_var_handle
22
using DiskArrayTools
33
using DiskArrays: DiskArrays, GridChunks
4+
using DiskArrayEngine: DiskArrayEngine as DAE
45
using DimensionalData: DimensionalData as DD, X, Y
56
using GeoFormatTypes
67
#using PyramidScheme: PyramidScheme as PS
78
using Rasters: Raster
9+
import DiskArrays: readblock!, IrregularChunks, AbstractDiskArray
10+
using StatsBase: rle
11+
using Statistics: mean
12+
13+
#DiskArrays.readblock!(a::AbstractArray,aout,i::AbstractUnitRange...) = copyto!(aout,view(a,i...))
14+
15+
struct LazyAggDiskArray{T,F,A} <: AbstractDiskArray{T,3}
16+
f::F
17+
arrays::A
18+
inds::IrregularChunks
19+
s::Tuple{Int,Int,Int}
20+
end
21+
function LazyAggDiskArray(f,arrays,groups)
22+
allequal(size,arrays) || error("All Arrays must have same size")
23+
allequal(eltype,arrays) || error("All Arrays must have same element type")
24+
inds = IrregularChunks(;chunksizes=last(rle(groups)))
25+
s = (size(first(arrays))...,length(inds))
26+
T = Base.promote_op(f,Vector{eltype(first(arrays))})
27+
LazyAggDiskArray{T,typeof(f),typeof(arrays)}(f,arrays,inds,s)
28+
end
29+
Base.size(a::LazyAggDiskArray) = a.s
30+
DiskArrays.haschunks(a::LazyAggDiskArray) = DiskArrays.haschunks(first(a.arrays))
31+
function DiskArrays.readblock!(a::LazyAggDiskArray,aout,i::UnitRange{Int}...)
32+
i1,i2,itime = i
33+
max_n_array = maximum(it->length(a.inds[it]),itime)
34+
buf = zeros(eltype(first(a.arrays)),length(i1),length(i2),max_n_array)
35+
for (j, it) in enumerate(itime)
36+
arrays_now = a.arrays[a.inds[it]]
37+
for ia in 1:length(arrays_now)
38+
DiskArrays.readblock!(arrays_now[ia],view(buf,:,:,ia),i1,i2)
39+
end
40+
vbuf = view(buf,:,:,1:length(arrays_now))
41+
map!(a.f,view(aout,:,:,j),eachslice(vbuf,dims=(1,2)))
42+
end
43+
end
44+
845

946
struct BufferGDALBand{T} <: AG.DiskArrays.AbstractDiskArray{T,2}
1047
filename::String
@@ -56,7 +93,7 @@ where the difference between neighbouring elements are less than `timediff` mill
5693
This returns the indices of the subgroups as a vector of vectors.
5794
"""
5895
function grouptimes(times, timediff=200000)
59-
@assert sort(times) == times
96+
@assert issorted(times)
6097
group = [1]
6198
groups = [group]
6299

@@ -73,6 +110,24 @@ function grouptimes(times, timediff=200000)
73110
return groups
74111
end
75112

113+
function stackindices(times, timediff=200000)
114+
@assert issorted(times)
115+
groups = zero(eachindex(times))
116+
group = 1
117+
groups[1] = group
118+
119+
for i in 2:length(times)
120+
period = times[i] - times[i-1]
121+
if period.value < timediff
122+
groups[i] = group
123+
else
124+
group += 1
125+
groups[i] = group
126+
end
127+
end
128+
return groups
129+
end
130+
76131
#=
77132
function DiskArrays.readblock!(b::GDALBand, aout, r::AbstractUnitRange...)
78133
if !isa(aout,Matrix)
@@ -93,19 +148,52 @@ function DiskArrays.readblock!(b::GDALBand, aout, r::AbstractUnitRange...)
93148
end
94149
=#
95150

96-
function gdalcube(filenames::AbstractVector{<:AbstractString})
151+
function gdalcube(filenames::AbstractVector{<:AbstractString}, stackgroups=:dae)
97152
dates = getdate.(filenames)
153+
@show length(dates)
98154
# Sort the dates and files by DateTime
99155
p = sortperm(dates)
100156
sdates = dates[p]
101157
sfiles = filenames[p]
102-
taxis = DD.Ti(sdates)
103158

104159
#@show sdates
105160
# Put the dates which are 200 seconds apart into groups
106-
#groupinds = grouptimes(sdates, 200000)
161+
if stackgroups in [:dae, :lazyagg]
162+
groupinds = grouptimes(sdates, 200000)
163+
onefile = first(sfiles)
164+
gd = backendlist[:gdal]
165+
yax1 = gd(onefile)
166+
#gdb = yax1["Gray"]
167+
#onecube = Cube(onefile)
168+
#@show onecube.axes
169+
gdb = get_var_handle(yax1, "Gray")
170+
gdbband = gdb.band
171+
gdbsize = gdb.size
172+
gdbattrs = gdb.attrs
173+
gdbcs = gdb.cs
174+
group_gdbs = map(sfiles) do f
175+
BufferGDALBand{eltype(gdb)}(f, gdbband, gdbsize, gdbattrs, gdbcs, Dict{Int,AG.IRasterBand}())
176+
end
177+
178+
cubelist = CFDiskArray.(group_gdbs, (gdbattrs,))
179+
stackinds = stackindices(sdates)
180+
aggdata = if stackgroups == :dae
181+
gcube = diskstack(cubelist)
182+
aggdata = DAE.aggregate_diskarray(gcube, mean skipmissing, (3=> stackinds,); strategy=:direct)
183+
else
184+
println("Construct lazy diskarray")
185+
LazyAggDiskArray(mean skipmissing, cubelist, stackinds)
186+
end
187+
# data = DiskArrays.ConcatDiskArray(reshape(groupcubes, (1,1,length(groupcubes))))
188+
dates_grouped = [sdates[group[begin]] for group in groupinds]
107189

190+
taxis = DD.Ti(dates_grouped)
191+
gcube = Cube(sfiles[1])
192+
return YAXArray((DD.dims(gcube)[1:2]..., taxis), aggdata, gcube.properties,)
193+
else
108194
#datasets = AG.readraster.(sfiles)
195+
taxis = DD.Ti(sdates)
196+
109197
onefile = first(sfiles)
110198
gd = backendlist[:gdal]
111199
yax1 = gd(onefile)
@@ -125,6 +213,7 @@ function gdalcube(filenames::AbstractVector{<:AbstractString})
125213
end
126214
all_cfs = CFDiskArray(stacked_gdbs, attrs)
127215
return YAXArray((onecube.axes..., taxis), all_cfs, onecube.properties)
216+
end
128217
#datasetgroups = [datasets[group] for group in groupinds]
129218
#We have to save the vrts because the usage of nested vrts is not working as a rasterdataset
130219
#temp = tempdir()

src/main.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using ArgParse
22
using YAXArrays: YAXDefaults
33

4+
45
const argparsesettings = ArgParseSettings()
56

67
ArgParse.parse_item(::Type{Date}, x::AbstractString) = Date(x)
@@ -28,7 +29,7 @@ ArgParse.parse_item(::Type{Date}, x::AbstractString) = Date(x)
2829

2930
"--orbit", "-o"
3031
help = "One of: Orbit number, 'A' for ascending, 'D' for descending, '*' for all orbits"
31-
default = "*"
32+
default = "A"
3233

3334
"--out-dir", "-d"
3435
help = "Path to output zarr dataset"
@@ -72,10 +73,12 @@ function main(;
7273
start_date::Date,
7374
end_date::Date,
7475
polarisation="VH",
75-
orbit="*",
76+
orbit="D",
7677
threshold=3.0,
77-
folders=["V01R01", "V0M2R4", "V1M0R1", "V1M1R1", "V1M1R2"]
78+
folders=["V1M0R1", "V1M1R1", "V1M1R2"],
79+
stack=:dae
7880
)
81+
in(orbit, ["A", "D"]) || error("Orbit needs to be either A or D")
7982
if isdir(indir) && isempty(indir)
8083
error("Input directory $indir must not be empty")
8184
end
@@ -98,15 +101,14 @@ function main(;
98101

99102
filenamelist = [glob("$(sub)/*$(continent)*20M/$(tilefolder)/*$(polarisation)_$(orbit)*.tif", indir) for sub in folders]
100103
allfilenames = collect(Iterators.flatten(filenamelist))
101-
@show allfilenames
102104

103105
relorbits = unique([split(basename(x), "_")[5][2:end] for x in allfilenames])
104106
@show relorbits
105107
for relorbit in relorbits
106108
filenames = allfilenames[findall(contains("$(relorbit)_E"), allfilenames)]
107-
@time cube = gdalcube(filenames)
109+
@time cube = gdalcube(filenames, stack)
108110

109-
path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(relorbit)_thresh_$(threshold)")
111+
path = joinpath(YAXDefaults.workdir[], "$(tilefolder)_rqatrend_$(polarisation)_$(orbit)$(relorbit)_thresh_$(threshold)_year_$(y)")
110112
@show path
111113
ispath(path * ".done") && continue
112114
ispath(path * "_zerotimesteps.done") && continue

0 commit comments

Comments
 (0)