Skip to content

Commit 31a80ef

Browse files
authored
Merge pull request #250 from xKDR/as/allow_arbitrary_cols
Allows multiple columns in domain estimation
2 parents 9d69475 + 8a4bf69 commit 31a80ef

File tree

5 files changed

+61
-7
lines changed

5 files changed

+61
-7
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Survey"
22
uuid = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c"
33
authors = ["Ayush Patnaik <ayushpatnaik@gmail.com>"]
4-
version = "0.1.0"
4+
version = "0.2.0"
55

66
[deps]
77
AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67"

src/by.jl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
function bydomain(x::Symbol, domain::Symbol, design::SurveyDesign, func::Function)
1+
function bydomain(x::Symbol, domain, design::SurveyDesign, func::Function)
22
gdf = groupby(design.data, domain)
3-
nd = length(unique(design.data[!, domain]))
43
X = combine(gdf, [x, design.weights] => ((a, b) -> func(a, weights(b))) => :statistic)
54
return X
65
end
76

8-
function bydomain(x::Symbol, domain::Symbol, design::ReplicateDesign, func::Function)
7+
function bydomain(x::Symbol, domain, design::ReplicateDesign, func::Function)
98
gdf = groupby(design.data, domain)
10-
nd = length(unique(design.data[!, domain]))
9+
nd = length(gdf)
1110
X = combine(gdf, [x, design.weights] => ((a, b) -> func(a, weights(b))) => :statistic)
1211
Xt_mat = Array{Float64,2}(undef, (nd, design.replicates))
1312
for i = 1:design.replicates

src/mean.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ julia> mean(:api00, :cname, bclus1)
136136
11 │ Mendocino 623.25 1.09545e-13
137137
```
138138
"""
139-
function mean(x::Symbol, domain::Symbol, design::AbstractSurveyDesign)
139+
function mean(x::Symbol, domain, design::AbstractSurveyDesign)
140140
weighted_mean(x, w) = mean(x, StatsBase.weights(w))
141141
df = bydomain(x, domain, design, weighted_mean)
142142
rename!(df, :statistic => :mean)

src/total.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ julia> total(:api00, :cname, bclus1)
120120
11 │ Mendocino 84380.6 80215.9
121121
```
122122
"""
123-
function total(x::Symbol, domain::Symbol, design::AbstractSurveyDesign)
123+
function total(x::Symbol, domain, design::AbstractSurveyDesign)
124124
df = bydomain(x, domain, design, wsum)
125125
rename!(df, :statistic => :total)
126126
end

test/total.jl

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,4 +168,59 @@ end
168168
# equivalent R code (results cause clutter):
169169
# > svyby(~api00, ~cname, clus1rep, svytotal)
170170
# > svyby(~api00, ~cname, clus1rep, svymean)
171+
172+
# Test multiple domains passed at once
173+
tot = total(:api00, [:stype,:cname], dclus1_boot)
174+
@test filter(row -> row[:cname] == "Los Angeles" && row[:stype] == "E", tot).SE[1] 343365 rtol = STAT_TOL
175+
@test filter(row -> row[:cname] == "Merced" && row[:stype] == "H", tot).SE[1] 27090.33 rtol = STAT_TOL
176+
177+
#### Why doesnt this syntax produce domain estimates??
178+
# Test that column specifiers from DataFrames make it through this pipeline
179+
# These tests replicate what you see above...just with a different syntax.
180+
# tot = total(:api00, Survey.DataFrames.Cols(==(:cname)), dclus1_boot)
181+
######## Above Survey.DataFrames.Cols(==(:cname)) syntax doesnt give domain estimates
182+
# @test size(tot)[1] == apiclus1.cname |> unique |> length
183+
# @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 489980.87 rtol = STAT_TOL
184+
# @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 430469.28 rtol = SE_TOL
185+
# @test filter(:cname => ==("San Diego"), tot).total[1] ≈ 1830375.53 rtol = STAT_TOL
186+
# @test filter(:cname => ==("San Diego"), tot).SE[1] ≈ 1298696.64 rtol = SE_TOL
171187
end
188+
189+
#### R code for vector{Symbol} domain estimation
190+
# > data(api)
191+
# > apiclus1$pw = rep(757/15,nrow(apiclus1))
192+
# > ### 9.04.23
193+
# > dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1);
194+
# > rclus1<-as.svrepdesign(dclus1, type="subbootstrap", compress=FALSE, replicates = 4000)
195+
# > svyby(~api00, ~stype+cname, rclus1, svytotal)
196+
# stype cname api00 se
197+
# E.Alameda E Alameda 273428.40 275423.33
198+
# H.Alameda H Alameda 30683.73 30907.60
199+
# M.Alameda M Alameda 67272.07 67762.88
200+
# E.Fresno E Fresno 48599.40 47484.67
201+
# H.Fresno H Fresno 22356.73 21843.93
202+
# M.Fresno M Fresno 24324.93 23766.99
203+
# E.Kern E Kern 24930.53 24847.76
204+
# M.Kern M Kern 20741.80 20672.93
205+
# E.Los Angeles E Los Angeles 395154.00 341692.92
206+
# M.Los Angeles M Los Angeles 94826.87 95416.42
207+
# E.Mendocino E Mendocino 58844.13 57711.15
208+
# H.Mendocino H Mendocino 35124.80 34448.51
209+
# M.Mendocino M Mendocino 31844.47 31231.33
210+
# E.Merced E Merced 50517.13 51424.65
211+
# H.Merced H Merced 26696.87 27176.47
212+
# M.Merced M Merced 27605.27 28101.18
213+
# E.Orange E Orange 463536.33 465047.76
214+
# M.Orange M Orange 110219.20 110578.59
215+
# E.Plumas E Plumas 144284.20 146672.86
216+
# H.Plumas H Plumas 143729.07 146108.54
217+
# M.Plumas M Plumas 34266.87 34834.16
218+
# E.San Diego E San Diego 1670497.13 1233144.04
219+
# H.San Diego H San Diego 63386.13 63693.54
220+
# M.San Diego M San Diego 96492.27 96960.22
221+
# E.San Joaquin E San Joaquin 848243.73 848605.33
222+
# H.San Joaquin H San Joaquin 79585.93 79619.86
223+
# M.San Joaquin M San Joaquin 101387.53 101430.75
224+
# E.Santa Clara E Santa Clara 737418.93 484164.71
225+
# H.Santa Clara H Santa Clara 35478.07 35311.28
226+
# M.Santa Clara M Santa Clara 187685.53 131278.63

0 commit comments

Comments
 (0)