1- # Obtain unique labels for row-wise pairs of values from a1 and a2 when mult1 is large enough
2- function _mult! (a1:: Array , mult1:: Integer , a2:: AbstractArray )
3- a1 .+ = mult1 .* (a2 .- 1 )
4- end
5-
61# A variant of SplitApplyCombine.groupfind using IdDict instead of Dictionaries.Dictionary
72function _groupfind (container)
83 T = keytype (container)
@@ -13,6 +8,21 @@ function _groupfind(container)
138 return inds
149end
1510
11+ function _refs_pool (col:: AbstractArray , ref_type:: Type{<:Integer} = UInt32)
12+ refs = refarray (col)
13+ pool = refpool (col)
14+ labeled = pool != = nothing
15+ if ! labeled
16+ refs, invpool, pool = _label (col, eltype (col), ref_type)
17+ end
18+ return refs, pool, labeled
19+ end
20+
21+ # Obtain unique labels for row-wise pairs of values from a1 and a2 when mult is large enough
22+ function _mult! (a1:: AbstractArray , a2:: AbstractArray , mult)
23+ a1 .+ = mult .* (a2 .- 1 )
24+ end
25+
1626"""
1727 findcell(cols::VecColumnTable)
1828 findcell(names, data, esample=Colon())
@@ -36,26 +46,15 @@ rather than those for the full `data`.
3646function findcell (cols:: VecColumnTable )
3747 ncol = size (cols, 2 )
3848 isempty (cols) && throw (ArgumentError (" no data column is found" ))
39- col = cols[1 ]
40- refs = refarray (col)
41- pool = refpool (col)
42- labeled = pool != = nothing && eltype (refs) <: Unsigned
43- if ! labeled
44- refs, invpool, pool = _label (col)
45- end
49+ refs, pool, labeled = _refs_pool (cols[1 ])
4650 mult = length (pool)
4751 if ncol > 1
4852 # Make a copy to be used as cache
49- labeled && (refs = collect (refs))
53+ labeled && (refs = copy (refs))
5054 @inbounds for n in 2 : ncol
51- col = cols[n]
52- refsn = refarray (col)
53- pool = refpool (col)
54- if pool === nothing || ! (eltype (refsn) <: Unsigned )
55- refsn, invpool, pool = _label (col)
56- end
55+ refsn, pool, labeled = _refs_pool (cols[n])
5756 multn = length (pool)
58- _mult! (refs, mult, refsn )
57+ _mult! (refs, refsn, mult )
5958 mult = mult * multn
6059 end
6160 end
@@ -108,3 +107,251 @@ function cellrows(cols::VecColumnTable, refrows::IdDict)
108107 end
109108 return cells, rows
110109end
110+
111+ """
112+ PanelStructure{R<:Signed, T1, T2<:TimeType}
113+
114+ Panel data structure defined by unique combinations of unit ids and time periods.
115+ It contains the information required for certain operations such as
116+ [`lag`](@ref) and [`diff`](@ref).
117+ See also [`setpanel`](@ref).
118+
119+ # Fields
120+ - `refs::Vector{R}`: reference values that allow obtaining time gaps by taking differences.
121+ - `invrefs::Dict{R, Int}`: inverse map from `refs` to indices.
122+ - `idpool::Vector{T1}`: unique unit ids.
123+ - `timepool::Vector{T2}`: sorted unique time periods.
124+ - `laginds::Dict{Int, Vector{Int}}`: a map from lag distances to vectors of indices of lagged values.
125+ """
126+ struct PanelStructure{R<: Signed , T1, T2<: TimeType }
127+ refs:: Vector{R}
128+ invrefs:: Dict{R, Int}
129+ idpool:: Vector{T1}
130+ timepool:: Vector{T2}
131+ laginds:: Dict{Int, Vector{Int}}
132+ function PanelStructure (refs:: Vector , idpool:: Vector , timepool:: Vector ,
133+ laginds:: Dict = Dict {Int, Vector{Int}} ())
134+ invrefs = Dict {eltype(refs), Int} (ref=> i for (i, ref) in enumerate (refs))
135+ return new {eltype(refs), eltype(idpool), eltype(timepool)} (
136+ refs, invrefs, idpool, timepool, laginds)
137+ end
138+ end
139+
140+ function _scaledrefs_pool (col:: AbstractArray , step, ref_type:: Type{<:Signed} = Int32)
141+ refs, pool, labeled = _refs_pool (col, ref_type)
142+ labeled && (refs = copy (refs))
143+ npool = length (pool)
144+ spool = sort (pool)
145+ if step === nothing
146+ gaps = view (spool, 2 : npool) - view (spool, 1 : npool- 1 )
147+ step = minimum (gaps)
148+ end
149+ pool1 = spool[1 ]
150+ refmap = Vector {eltype(refs)} (undef, npool)
151+ @inbounds for i in 1 : npool
152+ refmap[i] = (pool[i] - pool1) ÷ step + 1
153+ end
154+ @inbounds for i in 1 : length (refs)
155+ refs[i] = refmap[refs[i]]
156+ end
157+ return refs, spool
158+ end
159+
160+ """
161+ setpanel(data, idname, timename, timestep=nothing; ref_type=Int32)
162+ setpanel(id::AbstractArray, time::AbstractArray, timestep=nothing; ref_type=Int32)
163+
164+ Declare a [`PanelStructure`](@ref) which is required for certain operations
165+ such as [`lag`](@ref) and [`diff`](@ref).
166+ Either a `data` table with `idname` and `timename` for columns representing
167+ unit ids and time periods
168+ or two arrays `id` and `time` representing the two columns are required.
169+ In the former case, `data` must be Tables.jl-compatible.
170+
171+ By default, the time interval `timestep` between two adjacent periods is inferred
172+ based on the minimum gap between two values in the `time` column.
173+ The element type of reference values for [`PanelStructure`](@ref)
174+ can be specified with `ref_type`.
175+
176+ !!! note
177+ If the underlying data used to create the [`PanelStructure`](@ref) are modified.
178+ The changes will not be reflected in the existing instances of [`PanelStructure`](@ref).
179+ A new instance needs to be created with `setpanel`.
180+ """
181+ function setpanel (id:: AbstractArray , time:: AbstractArray , timestep= nothing ;
182+ ref_type:: Type{<:Signed} = Int32)
183+ eltype (time) <: TimeType ||
184+ throw (ArgumentError (" invalid element type $(eltype (time)) from time column" ))
185+ length (id) == length (time) || throw (DimensionMismatch (
186+ " id has length $(length (id)) while time has length $(length (time)) " ))
187+ refs, idpool, labeled = _refs_pool (id)
188+ labeled && (refs = copy (refs))
189+ trefs, tpool = _scaledrefs_pool (time, timestep, ref_type)
190+ # Multiply 2 to create enough gaps between id groups for the largest possible l
191+ mult = 2 * length (tpool)
192+ _mult! (trefs, refs, mult)
193+ return PanelStructure (trefs, idpool, tpool)
194+ end
195+
196+ function setpanel (data, idname:: Union{Symbol,Integer} , timename:: Union{Symbol,Integer} ,
197+ timestep= nothing ; ref_type:: Type{<:Signed} = Int32)
198+ istable (data) || throw (ArgumentError (" input data is not Tables.jl-compatible" ))
199+ return setpanel (getcolumn (data, idname), getcolumn (data, timename), timestep,
200+ ref_type= ref_type)
201+ end
202+
203+ show (io:: IO , panel:: PanelStructure ) = print (io, " Panel Structure" )
204+
205+ function show (io:: IO , :: MIME"text/plain" , panel:: PanelStructure )
206+ println (io, " Panel Structure:" )
207+ println (IOContext (io, :limit => true , :displaysize => (1 , 80 )), " idpool: " , panel. idpool)
208+ println (IOContext (io, :limit => true , :displaysize => (1 , 80 )), " timepool: " , panel. timepool)
209+ print (IOContext (io, :limit => true , :displaysize => (1 , 80 )), " laginds: " , panel. laginds)
210+ end
211+
212+ """
213+ findlag!(panel::PanelStructure, l::Integer=1)
214+
215+ Construct a vector of indices of the `l`th lagged values
216+ for all id-time combinations of `panel`
217+ and save the result in `panel.laginds`.
218+ If a lagged value does not exist, its index is filled with 0.
219+ See also [`ilag!`](@ref).
220+ """
221+ function findlag! (panel:: PanelStructure , l:: Integer = 1 )
222+ abs (l) < length (panel. timepool) ||
223+ throw (ArgumentError (" |l| must be smaller than $(length (panel. timepool)) ; got $l " ))
224+ refs = panel. refs
225+ invrefs = panel. invrefs
226+ T = eltype (refs)
227+ inds = Vector {Int} (undef, size (refs))
228+ l = convert (T, l)
229+ z = zero (T)
230+ @inbounds for i in keys (refs)
231+ ref = refs[i]
232+ inds[i] = get (invrefs, ref- l, z)
233+ end
234+ panel. laginds[l] = inds
235+ return inds
236+ end
237+
238+ """
239+ findlead!(panel::PanelStructure, l::Integer=1)
240+
241+ Construct a vector of indices of the `l`th lead values
242+ for all id-time combinations of `panel`
243+ and save the result in `panel.laginds`.
244+ If a lead value does not exist, its index is filled with 0.
245+ See also [`ilead!`](@ref).
246+ """
247+ findlead! (panel:: PanelStructure , l:: Integer = 1 ) = findlag! (panel, - l)
248+
249+ """
250+ ilag!(panel::PanelStructure, l::Integer=1)
251+
252+ Return a vector of indices of the `l`th lagged values
253+ for all id-time combinations of `panel`.
254+ The indices are retrieved from [`panel`](@ref) if they have been collected before.
255+ Otherwise, they are created by calling [`findlag!`](@ref).
256+ See also [`ilead!`](@ref).
257+ """
258+ function ilag! (panel:: PanelStructure , l:: Integer = 1 )
259+ il = get (panel. laginds, l, nothing )
260+ return il === nothing ? findlag! (panel, l) : il
261+ end
262+
263+ """
264+ ilead!(panel::PanelStructure, l::Integer=1)
265+
266+ Return a vector of indices of the `l`th lead values
267+ for all id-time combinations of `panel`.
268+ The indices are retrieved from [`panel`](@ref) if they have been collected before.
269+ Otherwise, they are created by calling [`findlead!`](@ref).
270+ See also [`ilag!`](@ref).
271+ """
272+ ilead! (panel:: PanelStructure , l:: Integer = 1 ) = ilag! (panel, - l)
273+
274+ """
275+ lag(panel::PanelStructure, v::AbstractArray, l::Integer=1; default=missing)
276+
277+ Return a vector of `l`th lagged values of `v` with missing values filled with `default`.
278+ The `panel` structure is respected.
279+ See also [`ilag!`](@ref) and [`lead`](@ref).
280+ """
281+ function lag (panel:: PanelStructure , v:: AbstractArray , l:: Integer = 1 ; default= missing )
282+ length (v) == length (panel. refs) || throw (DimensionMismatch (
283+ " v has length $(length (v)) while expecting $(length (panel. refs)) " ))
284+ inds = ilag! (panel, l)
285+ out = default === missing ? similar (v, Union{eltype (v), Missing}) : similar (v)
286+ @inbounds for i in 1 : length (v)
287+ out[i] = inds[i] == 0 ? default : v[inds[i]]
288+ end
289+ return out
290+ end
291+
292+ """
293+ lead(panel::PanelStructure, v::AbstractArray, l::Integer=1; default=missing)
294+
295+ Return a vector of `l`th lead values of `v` with missing values filled with `default`.
296+ The `panel` structure is respected.
297+ See also [`ilead!`](@ref) and [`lag`](@ref).
298+ """
299+ lead (panel:: PanelStructure , v:: AbstractArray , l:: Integer = 1 ; default= missing ) =
300+ lag (panel, v, - l, default= default)
301+
302+ function _diff! (dest:: AbstractArray , v:: AbstractArray , inds:: AbstractArray , default)
303+ @inbounds for i in 1 : length (v)
304+ dest[i] = inds[i] == 0 ? default : v[i] - v[inds[i]]
305+ end
306+ end
307+
308+ """
309+ diff!(dest::AbstractArray, panel::PanelStructure, v::AbstractArray; kwargs...)
310+
311+ Take the differences of `v` within observations for each unit in `panel`
312+ and store the result in `dest`.
313+ By default, it calculates the first differences.
314+ See also [`diff`](@ref).
315+
316+ # Keywords
317+ - `order::Integer=1`: the order of differences to be taken.
318+ - `l::Integer=1`: the time interval between each pair of observations.
319+ - `default=missing`: default values for indices where the differences do not exist.
320+ """
321+ function diff! (dest:: AbstractArray , panel:: PanelStructure , v:: AbstractArray ;
322+ order:: Integer = 1 , l:: Integer = 1 , default= missing )
323+ length (dest) == length (v) || throw (DimensionMismatch (
324+ " dest has length $(length (dest)) while v has length $(length (v)) " ))
325+ 0 < order < length (panel. timepool) || throw (ArgumentError (
326+ " order must be between 0 and $(length (panel. timepool)) ; got $order " ))
327+ inds = get (panel. laginds, l, nothing )
328+ inds === nothing && (inds = findlag! (panel, l))
329+ _diff! (dest, v, inds, default)
330+ if order > 1
331+ cache = similar (dest)
332+ for i in 2 : order
333+ copy! (cache, dest)
334+ _diff! (dest, cache, inds, default)
335+ end
336+ end
337+ return dest
338+ end
339+
340+ """
341+ diff(panel::PanelStructure, v::AbstractArray; kwargs...)
342+
343+ Return the differences of `v` within observations for each unit in `panel`.
344+ By default, it calculates the first differences.
345+ See also [`diff!`](@ref).
346+
347+ # Keywords
348+ - `order::Integer=1`: the order of differences to be taken.
349+ - `l::Integer=1`: the time interval between each pair of observations.
350+ - `default=missing`: default values for indices where the differences do not exist.
351+ """
352+ function diff (panel:: PanelStructure , v:: AbstractArray ;
353+ order:: Integer = 1 , l:: Integer = 1 , default= missing )
354+ out = default === missing ? similar (v, Union{eltype (v), Missing}) : similar (v)
355+ diff! (out, panel, v, order= order, l= l, default= default)
356+ return out
357+ end
0 commit comments