Skip to content

Commit c30421f

Browse files
authored
Tidy up code and move code for later analyses into separate files (#98)
* Remove duplicated tau_recurrence function from analysis * Add tests for getdate and countvalid * Move forest data handling related code into separate file. * Move netcdfify to forestdata * Remove agcube function This function should be superseeded by the LazyAggDiskArray approach, because it can't deal with the aggregation of the data.
1 parent b63c875 commit c30421f

File tree

4 files changed

+131
-140
lines changed

4 files changed

+131
-140
lines changed

src/analysis.jl

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using RecurrenceAnalysis: RecurrenceAnalysis as RA
2-
#using MultipleTesting
32
using Distances
3+
import RecurrenceAnalysis: tau_recurrence
4+
45
"""
56
countvalid(xout, xin)
67
@@ -11,22 +12,42 @@ function countvalid(xout, xin)
1112
xout .= count(!ismissing, xin)
1213
end
1314

14-
"""
15-
countvalidag(xout, xin)
16-
17-
Inner function to count the valid time steps in a datacube.
18-
This function is aimed to be used inside of a mapCube call.
19-
"""
20-
function countvalidint(xout, xin)
21-
xout .= count(x -> x != -9999, xin)
22-
end
2315
"""
2416
countvalid(cube)
2517
2618
Outer function to count the number of valid time steps in a cube.
2719
"""
28-
countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=InDims("Time"), outdims=OutDims(; path))
20+
countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=InDims("Time", filter=YAXArrays.DAT.NoFilter()), outdims=OutDims(; path))
21+
22+
@testitem "countvalid cube" begin
23+
using RQADeforestation
24+
using YAXArrays
25+
using Dates
26+
using DimensionalData: Ti, X, Y
27+
using Statistics
28+
import Random
29+
using Missings
30+
Random.seed!(1234)
2931

32+
mock_axes = (
33+
Ti(Date("2022-01-01"):Day(1):Date("2022-01-30")),
34+
X(range(1, 10, length=10)),
35+
Y(range(1, 5, length=15)),
36+
)
37+
mock_data = allowmissing(rand(30, 10, 15))
38+
mock_data[1:10,1,1] .= missing
39+
mock_data[:, 2,1] .= missing
40+
mock_data[[1,5,9], 2,2] .= missing
41+
mock_props = Dict()
42+
mock_cube = YAXArray(mock_axes, mock_data, mock_props)
43+
44+
mock_count = RQADeforestation.countvalid(mock_cube)
45+
@test mock_count.axes == (mock_cube.X, mock_cube.Y)
46+
@test mock_count[1,1] == 20
47+
@test mock_count[1,2] == 30
48+
@test mock_count[2,2] == 27
49+
@test mock_count[2,1] == 0
50+
end
3051

3152
"""
3253
rqatrend(xout, xin, thresh)
@@ -35,9 +56,7 @@ Compute the RQA trend metric for the non-missing time steps of xin, and save it
3556
`thresh` specifies the epsilon threshold of the Recurrence Plot computation
3657
"""
3758
function rqatrend_recurrenceanalysis(pix_trend, pix, thresh=2)
38-
#replace!(pix, -9999 => missing)
3959
ts = collect(skipmissing(pix))
40-
#@show length(ts)
4160
tau_pix = tau_recurrence(ts, thresh)
4261
pix_trend .= RA._trend(tau_pix)
4362
end
@@ -49,6 +68,7 @@ function rqatrend_matrix(pix_trend, pix, thresh=2)
4968
pix_trend .= RA.trend(rm)
5069
end
5170

71+
#=
5272
"""
5373
rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300)
5474
Compute the RQA trend metric for shuffled time series of the data cube `cube` with the epsilon threshold `thresh` for `numshuffle` tries and save it into `path`.
@@ -58,25 +78,9 @@ function rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=
5878
# TODO this looks completely broken
5979
sg = surrogenerator(collect(eachindex(water[overlap])), BlockShuffle(7, shift=true))
6080
end
81+
=#
6182

6283

63-
import RecurrenceAnalysis: tau_recurrence
64-
65-
function tau_recurrence(ts::AbstractVector, thresh, metric=Euclidean())
66-
n = length(ts)
67-
rr_τ = zeros(n)
68-
for col in 1:n
69-
for row in 1:(col-1)
70-
d = evaluate(metric, ts[col], ts[row])
71-
#@show row, col, d
72-
rr_τ[col-row+1] += d <= thresh
73-
end
74-
end
75-
rr_τ[1] = n
76-
rr_τ ./ (n:-1:1)
77-
#rr_τ
78-
end
79-
8084
"""
8185
anti_diagonal_density(ts, thresh, metric)
8286
Compute the average density of the diagonals perpendicular to the main diagonal for data series `ts`.
@@ -110,6 +114,4 @@ function inner_postprocessing(rqadata, forestmask; threshold=-1.28, clustersize=
110114
x
111115
end
112116
end
113-
#@time labeldata = MultipleTesting.label_components(rqathresh,trues(3,3))
114-
#@time clusterdata = MultipleTesting.maskcluster(rqathresh, labeldata, clustersize)
115117
end

src/auxil.jl

Lines changed: 11 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ using DiskArrays: DiskArrays, GridChunks
44
using DiskArrayEngine: DiskArrayEngine as DAE
55
using DimensionalData: DimensionalData as DD, X, Y
66
using GeoFormatTypes
7-
#using PyramidScheme: PyramidScheme as PS
87
using Rasters: Raster
98
import DiskArrays: readblock!, IrregularChunks, AbstractDiskArray
109
using StatsBase: rle
@@ -70,8 +69,15 @@ function DiskArrays.readblock!(b::BufferGDALBand, aout, r::AbstractUnitRange...)
7069
end
7170

7271
function getdate(x, reg=r"[0-9]{8}T[0-9]{6}", df=dateformat"yyyymmddTHHMMSS")
73-
m = match(reg, x).match
74-
date = DateTime(m, df)
72+
m = match(reg, x)
73+
isnothing(m) && throw(ArgumentError("Did not find a datetime information in $x"))
74+
date = DateTime(m.match, df)
75+
end
76+
77+
@testitem "getdate" begin
78+
using Dates
79+
@test RQADeforestation.getdate("sometext20200919T202020_somemoretext1234") == DateTime(2020,9,19, 20,20,20)
80+
@test_throws Exception RQADeforestation.getdate("sometext")
7581
end
7682

7783
"""
@@ -110,6 +116,7 @@ function grouptimes(times, timediff=200000)
110116
return groups
111117
end
112118

119+
113120
function stackindices(times, timediff=200000)
114121
@assert issorted(times)
115122
groups = zero(eachindex(times))
@@ -234,32 +241,6 @@ end
234241
end
235242

236243

237-
"""
238-
agcube(filenames)
239-
Open the underlying tiff files via ArchGDAL.
240-
This opens all files and keeps them open.
241-
This has a higher upfront cost, but might lead to a speedup down the line when we access the data.
242-
"""
243-
function agcube(filenames::AbstractVector{<:AbstractString})
244-
dates = RQADeforestation.getdate.(filenames)
245-
# Sort the dates and files by DateTime
246-
p = sortperm(dates)
247-
sdates = dates[p]
248-
sfiles = filenames[p]
249-
taxis = Ti(sdates)
250-
datasets = AG.readraster.(sfiles)
251-
yaxlist = YAXArray.(datasets)
252-
return concatenatecubes(yaxlist, taxis)
253-
end
254-
255-
function netcdfify(path)
256-
c = Cube(path)
257-
npath = splitext(path)[1] * ".nc"
258-
fl32 = map(x -> ismissing(x) ? x : Float32(x), c)
259-
fl32.properties["_FillValue"] *= 1.0f0
260-
savecube(fl32, npath; compress=5)
261-
end
262-
263244
const equi7crs = Dict(
264245
"AF" => ProjString("+proj=aeqd +lat_0=8.5 +lon_0=21.5 +x_0=5621452.01998 +y_0=5990638.42298 +datum=WGS84 +units=m +no_defs"),
265246
"AN" => ProjString("+proj=aeqd +lat_0=-90 +lon_0=0 +x_0=3714266.97719 +y_0=3402016.50625 +datum=WGS84 +units=m +no_defs"),
@@ -268,77 +249,4 @@ const equi7crs = Dict(
268249
"NA" => ProjString("+proj=aeqd +lat_0=52 +lon_0=-97.5 +x_0=8264722.17686 +y_0=4867518.35323 +datum=WGS84 +units=m +no_defs"),
269250
"OC" => ProjString("+proj=aeqd +lat_0=-19.5 +lon_0=131.5 +x_0=6988408.5356 +y_0=7654884.53733 +datum=WGS84 +units=m +no_defs"),
270251
"SA" => ProjString("+proj=aeqd +lat_0=-14 +lon_0=-60.5 +x_0=7257179.23559 +y_0=5592024.44605 +datum=WGS84 +units=m +no_defs")
271-
)
272-
273-
# Auxillary functions for masking with the forest data
274-
275-
function getsubtiles(tile)
276-
east = eastint(tile)
277-
north = northint(tile)
278-
tiles = ["E$(lpad(e,3,"0"))N$(lpad(n, 3, "0"))T1" for e in east:(east+2), n in north:(north+2)]
279-
return tiles
280-
end
281-
282-
eastint(tile) = parse(Int, tile[2:4])
283-
northint(tile) = parse(Int, tile[6:8])
284-
285-
286-
287-
function aggregate_forestry(tile)
288-
subtiles = getsubtiles(tile)
289-
foresttiles = [(parse.(Int, match(r"E(\d\d\d)N(\d\d\d)T1", t).captures)...,) => "/eodc/private/pangeojulia/ForestType/2017_FOREST-CLASSES_EU010M_$(t).tif" for t in subtiles]
290-
filledtiles = filter(x -> isfile(last(x)), foresttiles)
291-
if isempty(filledtiles)
292-
return nothing
293-
end
294-
295-
296-
idx_to_fname = Dict(filledtiles...)
297-
a = Cube(last(first(filledtiles)))
298-
east = eastint(tile)
299-
north = northint(tile)
300-
f = ChunkedFillArray(a[1, 1], size(a), size.(DiskArrays.eachchunk(a)[1], 1))
301-
allarrs = [haskey(idx_to_fname, (x, y)) ? Cube(idx_to_fname[(x, y)]).data : f for x in east:(east+2), y in north:(north+2)]
302-
303-
yaxs = Cube.(last.(filledtiles))
304-
#ext = Extents.union(yaxs...)
305-
#tilex = Rasters._mosaic(first.(dims.(yaxs))...)
306-
#tiley = Rasters._mosaic(last.(dims.(yaxs))...)
307-
diskarray_merged = DiskArrayTools.ConcatDiskArray(allarrs)
308-
309-
# We should first do the pyramid computation and then stitch non values along
310-
311-
312-
#foryax = Cube.(filledtiles)
313-
#forest = YAXArrays.Datasets.open_mfdataset(vec(foresttiles))
314-
315-
aggfor = [PS.gen_output(Union{Int8,Missing}, ceil.(Int, size(c) ./ 2)) for c in yaxs]
316-
#a = aggfor[1]
317-
#yax = foryax[1]
318-
#PS.fill_pyramids(yax, a, x->sum(x) >0,true)
319-
println("Start aggregating")
320-
@time [PS.fill_pyramids(yaxs[i].data, aggfor[i], x -> count(!iszero, x) == 4 ? true : missing, true) for i in eachindex(yaxs)]
321-
#tilepath = joinpath(indir, tile * suffix)
322-
#aggyax = [Raster(aggfor[i][1][:,:,1], (PS.agg_axis(dims(yax,X), 2), PS.agg_axis(dims(yax, Y), 2))) for (i, yax) in enumerate(foryax)]
323-
#ras = Raster(tilepath)
324-
#allagg = ConcatDiskArray(only.(aggfor)[:,[3,2,1]])
325-
326-
#allagg = ConcatDiskArray(aggfor[:,[3,2,1]])
327-
#allagg = ConcatDiskArray(only.(aggfor))
328-
forras = Raster.(foresttiles, lazy=true)
329-
xaxs = DD.dims.(forras[:, 1], X)
330-
xaxsnew = [xax[begin:2:end] for xax in xaxs]
331-
xax = vcat(xaxsnew...)
332-
yaxs = DD.dims.(forras[1, :], Y)
333-
yaxsnew = [yax[begin:2:end] for yax in yaxs]
334-
yax = vcat(reverse(yaxsnew)...)
335-
YAXArray((xax, yax), allagg[:, :, 1])
336-
end
337-
338-
function maskforests(tilepath, outdir=".")
339-
tile = match(r"E\d\d\dN\d\d\dT3", tilepath).match
340-
forras = aggregate_forestry(tile)
341-
ras = Raster(tilepath)
342-
mras = forras .* ras
343-
write(joinpath(outdir, "forestmasked_all" * tile * suffix), mras)
344-
end
252+
)

src/forestdata.jl

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
2+
# Auxillary functions for masking with the forest data
3+
#=
4+
function getsubtiles(tile)
5+
east = eastint(tile)
6+
north = northint(tile)
7+
tiles = ["E$(lpad(e,3,"0"))N$(lpad(n, 3, "0"))T1" for e in east:(east+2), n in north:(north+2)]
8+
return tiles
9+
end
10+
11+
eastint(tile) = parse(Int, tile[2:4])
12+
northint(tile) = parse(Int, tile[6:8])
13+
14+
15+
16+
function aggregate_forestry(tile)
17+
subtiles = getsubtiles(tile)
18+
foresttiles = [(parse.(Int, match(r"E(\d\d\d)N(\d\d\d)T1", t).captures)...,) => "/eodc/private/pangeojulia/ForestType/2017_FOREST-CLASSES_EU010M_$(t).tif" for t in subtiles]
19+
filledtiles = filter(x -> isfile(last(x)), foresttiles)
20+
if isempty(filledtiles)
21+
return nothing
22+
end
23+
24+
25+
idx_to_fname = Dict(filledtiles...)
26+
a = Cube(last(first(filledtiles)))
27+
east = eastint(tile)
28+
north = northint(tile)
29+
f = ChunkedFillArray(a[1, 1], size(a), size.(DiskArrays.eachchunk(a)[1], 1))
30+
allarrs = [haskey(idx_to_fname, (x, y)) ? Cube(idx_to_fname[(x, y)]).data : f for x in east:(east+2), y in north:(north+2)]
31+
32+
yaxs = Cube.(last.(filledtiles))
33+
#ext = Extents.union(yaxs...)
34+
#tilex = Rasters._mosaic(first.(dims.(yaxs))...)
35+
#tiley = Rasters._mosaic(last.(dims.(yaxs))...)
36+
diskarray_merged = DiskArrayTools.ConcatDiskArray(allarrs)
37+
38+
# We should first do the pyramid computation and then stitch non values along
39+
40+
41+
#foryax = Cube.(filledtiles)
42+
#forest = YAXArrays.Datasets.open_mfdataset(vec(foresttiles))
43+
44+
aggfor = [PS.gen_output(Union{Int8,Missing}, ceil.(Int, size(c) ./ 2)) for c in yaxs]
45+
#a = aggfor[1]
46+
#yax = foryax[1]
47+
#PS.fill_pyramids(yax, a, x->sum(x) >0,true)
48+
println("Start aggregating")
49+
@time [PS.fill_pyramids(yaxs[i].data, aggfor[i], x -> count(!iszero, x) == 4 ? true : missing, true) for i in eachindex(yaxs)]
50+
#tilepath = joinpath(indir, tile * suffix)
51+
#aggyax = [Raster(aggfor[i][1][:,:,1], (PS.agg_axis(dims(yax,X), 2), PS.agg_axis(dims(yax, Y), 2))) for (i, yax) in enumerate(foryax)]
52+
#ras = Raster(tilepath)
53+
#allagg = ConcatDiskArray(only.(aggfor)[:,[3,2,1]])
54+
55+
#allagg = ConcatDiskArray(aggfor[:,[3,2,1]])
56+
#allagg = ConcatDiskArray(only.(aggfor))
57+
forras = Raster.(foresttiles, lazy=true)
58+
xaxs = DD.dims.(forras[:, 1], X)
59+
xaxsnew = [xax[begin:2:end] for xax in xaxs]
60+
xax = vcat(xaxsnew...)
61+
yaxs = DD.dims.(forras[1, :], Y)
62+
yaxsnew = [yax[begin:2:end] for yax in yaxs]
63+
yax = vcat(reverse(yaxsnew)...)
64+
YAXArray((xax, yax), allagg[:, :, 1])
65+
end
66+
67+
function maskforests(tilepath, outdir=".")
68+
tile = match(r"E\d\d\dN\d\d\dT3", tilepath).match
69+
forras = aggregate_forestry(tile)
70+
ras = Raster(tilepath)
71+
mras = forras .* ras
72+
write(joinpath(outdir, "forestmasked_all" * tile * suffix), mras)
73+
end
74+
75+
"""
76+
netcdfify(path)
77+
Convert the raster data at `path` to netcdf by converting the data to Float32 and resaving.
78+
"""
79+
function netcdfify(path)
80+
c = Cube(path)
81+
npath = splitext(path)[1] * ".nc"
82+
fl32 = map(x -> ismissing(x) ? x : Float32(x), c)
83+
fl32.properties["_FillValue"] *= 1.0f0
84+
savecube(fl32, npath; compress=5)
85+
end
86+
=#

src/main.jl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,6 @@ function main(;
134134
rethrow(e)
135135
end
136136
end
137-
#=@everywhere begin
138-
fname = "$(VERSION)_$(getpid())_$(time_ns()).heapsnapshot"
139-
Profile.take_heap_snapshot(fname;streaming=true)
140-
end
141-
=#
142137
touch(path * ".done")
143138
end
144139
end

0 commit comments

Comments
 (0)