Skip to content

Commit e26c3a1

Browse files
authored
Merge pull request #14 from PharmCat/dev
DSata struct
2 parents e8e672d + ad1f538 commit e26c3a1

File tree

4 files changed

+119
-79
lines changed

4 files changed

+119
-79
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "MetidaStats"
22
uuid = "75cdad26-409a-4e43-8ad7-d54b4fa665a0"
33
authors = ["PharmCat <v.s.arnautov@yandex.ru>"]
4-
version = "0.2.2"
4+
version = "0.3.0"
55

66
[deps]
77

src/descriptive.jl

Lines changed: 115 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,30 @@ const STATLIST = [:n,
2929
:sek,
3030
:sum]
3131

32+
struct DStat
33+
sym::Symbol
34+
name
35+
corrected
36+
level
37+
end
38+
39+
DStat(s::Symbol; corrected = nothing, level = nothing) = DStat(s, string(s), corrected, level)
40+
DStat(p::Pair{Symbol, <:Any}; corrected = nothing, level = nothing) = DStat(p[1], p[2], corrected, level)
41+
DStat(s::String; corrected = nothing, level = nothing) = DStat(Symbol(s), s, corrected, level)
42+
DStat(p::Pair{String, <:Any}; corrected = nothing, level = nothing) = DStat(Symbol(p[1]), p[2], corrected, level)
43+
DStat(ds::DStat) = d
44+
make_stats(v::AbstractVector) = map(DStat, v)
45+
make_stats(x) = [DStat(x)]
46+
statssymbs(v::AbstractVector) = map(x->x.sym, v)
47+
correctedval(a::Nothing, b) = b
48+
correctedval(a::Bool, b) = a
49+
qval(level::Nothing, q, n_) = q
50+
qval(level::AbstractFloat, q, n_) = begin
51+
if n_ > 1 && a >= 0 && a <= 1 qv = quantile(TDist(n_ - 1), 1 - (1 - level) / 2) else qv = NaN end
52+
qv
53+
end
54+
levelval(level::Nothing, l) = l
55+
levelval(level::AbstractFloat, l) = level
3256
"""
3357
dataimport(data; vars, sort = nothing)
3458
@@ -176,21 +200,22 @@ function descriptives(data::DataSet{T}; kwargs...) where T <: ObsData
176200

177201

178202
if !(:stats in k)
179-
kwargs[:stats] = [:n, :mean, :sd, :se, :median, :min, :max]
203+
stats = [:n, :mean, :sd, :se, :median, :min, :max]
204+
kwargs[:stats] = make_stats(stats)
180205
else
181-
if isa(kwargs[:stats], Symbol) kwargs[:stats] = [kwargs[:stats]] end
182-
if isa(kwargs[:stats], String) kwargs[:stats] = [Symbol(kwargs[:stats])] end
206+
kwargs[:stats] = make_stats(kwargs[:stats])
207+
stats = statssymbs(kwargs[:stats])
183208
end
184209

185-
kwargs[:stats] STATLIST || error("Some statistics not known!")
210+
stats STATLIST || error("Some statistics not known!")
186211

187-
if any(x -> x in [:geom, :geomean, :logmean, :logvar, :geocv], kwargs[:stats])
212+
if any(x -> x in [:geom, :geomean, :logmean, :logvar, :geocv], stats)
188213
makelogvec = true
189214
else
190215
makelogvec = false
191216
end
192217

193-
if any(x -> x in [:lci, :uci, :lmeanci, :umeanci], kwargs[:stats])
218+
if any(x -> x in [:lci, :uci, :lmeanci, :umeanci], stats)
194219
cicalk = true
195220
else
196221
cicalk = false
@@ -199,7 +224,7 @@ function descriptives(data::DataSet{T}; kwargs...) where T <: ObsData
199224
ds = Vector{Descriptives}(undef, length(data))
200225
i = 1
201226
for d in data
202-
ds[i] = Descriptives(d, descriptives_(d.obs, kwargs, makelogvec, cicalk))
227+
ds[i] = Descriptives(d, kwargs[:stats], descriptives_(d.obs, kwargs, makelogvec, cicalk))
203228
i += 1
204229
end
205230
DataSet(identity.(ds))
@@ -215,7 +240,7 @@ function descriptives_(obsvec, kwargs, logstats, cicalk)
215240
end
216241
n_ = length(vec)
217242
if cicalk
218-
if n_ > 1 q = quantile(TDist(n_ - 1), 1 - (1 - kwargs[:level]) / 2) end # add tdist / normal option # add multiple CI ?
243+
if n_ > 1 && kwargs[:level] >= 0 && kwargs[:level] <= 1 q = quantile(TDist(n_ - 1), 1 - (1 - kwargs[:level]) / 2) else q = NaN end# add tdist / normal option # add multiple CI ?
219244
end
220245
# skipnonpositive
221246
# logstats = makelogvec #calk logstats
@@ -247,96 +272,110 @@ function descriptives_(obsvec, kwargs, logstats, cicalk)
247272

248273
for s in kwargs[:stats]
249274

250-
if s == :n
251-
result[s] = n_
252-
elseif s == :posn
253-
result[s] = logn_
275+
if s.sym == :n
276+
result[s.sym] = n_
277+
elseif s.sym == :posn
278+
result[s.sym] = logn_
254279
elseif !(n_ > 0)
255-
result[s] = NaN
280+
result[s.sym] = NaN
256281
continue
257-
elseif s == :mean
258-
result[s] = sum(vec) / n_
259-
elseif s == :sd
282+
elseif s.sym == :mean
283+
result[s.sym] = sum(vec) / n_
284+
elseif s.sym == :sd
260285
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
261-
result[s] = std(vec; corrected = kwargs[:corrected], mean = result[:mean])
262-
elseif s == :var
286+
corrected = correctedval(s.corrected, kwargs[:corrected])
287+
result[s.sym] = std(vec; corrected = corrected, mean = result[:mean])
288+
elseif s.sym == :var
263289
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
264-
result[s] = var(vec; corrected = kwargs[:corrected], mean = result[:mean])
265-
elseif s == :bvar
290+
corrected = correctedval(s.corrected, kwargs[:corrected])
291+
result[s.sym] = var(vec; corrected = corrected, mean = result[:mean])
292+
elseif s.sym == :bvar
266293
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
267-
result[s] = var(vec; corrected = false, mean = result[:mean])
268-
elseif s == :se
294+
result[s.sym] = var(vec; corrected = false, mean = result[:mean])
295+
elseif s.sym == :se
269296
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
270-
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end
271-
result[s] = result[:sd] / sqrt(n_)
272-
elseif s == :cv
297+
corrected = correctedval(s.corrected, kwargs[:corrected])
298+
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = corrected, mean = result[:mean]) end
299+
result[s.sym] = result[:sd] / sqrt(n_)
300+
elseif s.sym == :cv
273301
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
274-
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end
275-
result[s] = abs(result[:sd] / result[:mean] * 100)
276-
elseif s == :uci
302+
corrected = correctedval(s.corrected, kwargs[:corrected])
303+
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = corrected, mean = result[:mean]) end
304+
result[s.sym] = abs(result[:sd] / result[:mean] * 100)
305+
elseif s.sym == :uci
277306
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
278-
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end
279-
result[s] = result[:mean] + q * result[:sd]
280-
elseif s == :lci
307+
corrected = correctedval(s.corrected, kwargs[:corrected])
308+
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = corrected, mean = result[:mean]) end
309+
qv = qval(s.level, q, n_)
310+
result[Symbol(string(s.sym)*"_$(levelval(s.level, kwargs[:level]))")] = result[:mean] + qv * result[:sd]
311+
elseif s.sym == :lci
281312
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
282-
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end
283-
result[s] = result[:mean] - q * result[:sd]
284-
elseif s == :umeanci
313+
corrected = correctedval(s.corrected, kwargs[:corrected])
314+
qv = qval(s.level, q, n_)
315+
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = corrected, mean = result[:mean]) end
316+
result[Symbol(string(s.sym)*"_$(levelval(s.level, kwargs[:level]))")] = result[:mean] - qv * result[:sd]
317+
elseif s.sym == :umeanci
285318
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
286-
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end
319+
corrected = correctedval(s.corrected, kwargs[:corrected])
320+
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = corrected, mean = result[:mean]) end
287321
haskey(result, :se) || begin result[:se] = result[:sd] / sqrt(n_) end
288-
result[s] = result[:mean] + q * result[:se]
289-
elseif s == :lmeanci
322+
qv = qval(s.level, q, n_)
323+
result[Symbol(string(s.sym)*"_$(levelval(s.level, kwargs[:level]))")] = result[:mean] + qv * result[:se]
324+
elseif s.sym == :lmeanci
290325
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
291-
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end
326+
corrected = correctedval(s.corrected, kwargs[:corrected])
327+
haskey(result, :sd) || begin result[:sd] = std(vec; corrected = corrected, mean = result[:mean]) end
292328
haskey(result, :se) || begin result[:se] = result[:sd] / sqrt(n_) end
293-
result[s] = result[:mean] - q * result[:se]
294-
elseif s == :median
295-
result[s] = median(vec)
296-
elseif s == :min
297-
result[s] = minimum(vec)
298-
elseif s == :max
299-
result[s] = maximum(vec)
300-
elseif s == :q1
301-
result[s] = quantile(vec, 0.25)
302-
elseif s == :q3
303-
result[s] = quantile(vec, 0.75)
304-
elseif s == :iqr
305-
result[s] = abs(quantile(vec, 0.75) - quantile(vec, 0.25))
306-
elseif s == :range
307-
result[s] = abs(maximum(vec) - minimum(vec))
308-
elseif s == :kurt
329+
qv = qval(s.level, q, n_)
330+
result[Symbol(string(s.sym)*"_$(levelval(s.level, kwargs[:level]))")] = result[:mean] - qv * result[:se]
331+
elseif s.sym == :median
332+
result[s.sym] = median(vec)
333+
elseif s.sym == :min
334+
result[s.sym] = minimum(vec)
335+
elseif s.sym == :max
336+
result[s.sym] = maximum(vec)
337+
elseif s.sym == :q1
338+
result[s.sym] = quantile(vec, 0.25)
339+
elseif s.sym == :q3
340+
result[s.sym] = quantile(vec, 0.75)
341+
elseif s.sym == :iqr
342+
result[s.sym] = abs(quantile(vec, 0.75) - quantile(vec, 0.25))
343+
elseif s.sym == :range
344+
result[s.sym] = abs(maximum(vec) - minimum(vec))
345+
elseif s.sym == :kurt
309346
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
310-
result[s] = kurtosis_(vec, result[:mean])
311-
elseif s == :skew
347+
result[s.sym] = kurtosis_(vec, result[:mean])
348+
elseif s.sym == :skew
312349
haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end
313-
result[s] = skewness_(vec, result[:mean])
314-
elseif s == :harmmean
315-
result[s] = harmmean(vec)
316-
elseif s == :ses
317-
result[s] = sesvec(vec)
318-
elseif s == :sek
319-
result[s] = sekvec(vec)
320-
elseif s == :sum
321-
result[s] = sum(vec)
350+
result[s.sym] = skewness_(vec, result[:mean])
351+
elseif s.sym == :harmmean
352+
result[s.sym] = harmmean(vec)
353+
elseif s.sym == :ses
354+
result[s.sym] = sesvec(vec)
355+
elseif s.sym == :sek
356+
result[s.sym] = sekvec(vec)
357+
elseif s.sym == :sum
358+
result[s.sym] = sum(vec)
322359
elseif !logstats
323-
result[s] = NaN
360+
result[s.sym] = NaN
324361
continue
325-
elseif s == :logmean
326-
result[s] = sum(logvec) / logn_
327-
elseif s == :geom || s == :geomean
362+
elseif s.sym == :logmean
363+
result[s.sym] = sum(logvec) / logn_
364+
elseif s.sym == :geom || s.sym == :geomean
328365
haskey(result, :logmean) || begin result[:logmean] = sum(logvec) / logn_ end
329-
result[s] = exp(result[:logmean])
330-
elseif s == :logvar
366+
result[s.sym] = exp(result[:logmean])
367+
elseif s.sym == :logvar
331368
haskey(result, :logmean) || begin result[:logmean] = sum(logvec) / logn_ end
332-
result[s] = var(logvec; corrected = kwargs[:corrected], mean = result[:logmean])
333-
elseif s == :geocv
369+
corrected = correctedval(s.corrected, kwargs[:corrected])
370+
result[s.sym] = var(logvec; corrected = corrected, mean = result[:logmean])
371+
elseif s.sym == :geocv
334372
haskey(result, :logmean) || begin result[:logmean] = sum(logvec) / logn_ end
335-
haskey(result, :logvar) || begin result[:logvar] = var(logvec; corrected = kwargs[:corrected], mean = result[:logmean]) end
336-
result[s] = sqrt(exp(result[:logvar]) - 1)*100
373+
corrected = correctedval(s.corrected, kwargs[:corrected])
374+
haskey(result, :logvar) || begin result[:logvar] = var(logvec; corrected = corrected, mean = result[:logmean]) end
375+
result[s.sym] = sqrt(exp(result[:logvar]) - 1)*100
337376
end
338377
end
339-
filter!(x -> x.first in kwargs[:stats], result)
378+
filter!(x -> x.first in statssymbs(kwargs[:stats]) || occursin("ci", string(x.first)), result)
340379

341380
#if any(:lci in keys(result)) Symbol(string(s)*@sprintf("%g", kwargs[:level]*100))
342381

src/types.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ end
1010

1111
struct Descriptives{T <: ObsData} <: AbstractIDResult{T}
1212
data::T
13+
stats
1314
result::AbstractDict
1415
end
1516

test/runtests.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ io = IOBuffer();
6767
2.98185487195489000
6868
3.1632509429411400]
6969

70-
@test des2[:, :lmeanci] [ 38.888829025867736
70+
@test des2[:, Symbol("lmeanci_0.95")] [ 38.888829025867736
7171
-15.09569073847069
7272
49.86853008320322
7373
-6.924357468515037
@@ -76,7 +76,7 @@ io = IOBuffer();
7676
41.32592725057564
7777
-9.544313795179544]
7878

79-
@test des2[:, :umeanci] [64.7979625770289
79+
@test des2[:, Symbol("umeanci_0.95")] [64.7979625770289
8080
8.610185372044523
8181
66.25674914892346
8282
10.684430345646309

0 commit comments

Comments
 (0)