77# ' and uses a Gibbs sampler to impute the individual race probabilities, using
88# ' the model of Imai et al. (2022).
99# '
10+ # ' # Surname Matching
11+ # ' The Census surname table can be inspected with the following code:
12+ # ' ```r
13+ # ' readRDS(system.file("extdata", "names_2010_counts.rds", package="birdie"))
14+ # ' ```
15+ # ' Surnames are processed with [proc_name()] before being matched to the table.
16+ # ' The code also recognizes double-barrelled (hyphenated) surnames and attempts
17+ # ' to match each part if the overall name is not found in the surname table.
18+ # ' Specifying `save_rs=TRUE` will save the matched surname table and a lookup
19+ # ' vector that matches each individual to their surname table row. The overall
20+ # ' match rate is reported as part of the `summary()` output.
21+ # '
1022# ' @param formula A formula specifying the BISG model. Must include the special
1123# ' term `nm()` to identify the surname variable. Certain geographic variables
1224# ' can be identified similarly: `zip()` for ZIP codes, and `state()` for
5163# ' @param save_rgx If `TRUE`, save the `p_rgx` table (matched to each
5264# ' individual) as the `"p_rgx"` and `"gx"` attributes of the output.
5365# ' Necessary for some sensitivity analyses.
66+ # ' @param save_sr If `TRUE`, save the `p_sr` table (surname given race; matched
67+ # ' to each individual as the `"p_sr"` and `"s"` attributes of the output.
5468# '
5569# ' @return An object of class `bisg`, which is just a data frame with some
5670# ' additional attributes. The data frame has rows matching the input data and
8296# ' @concept bisg
8397# ' @export
8498bisg <- function (formula , data = NULL , p_r = p_r_natl(), p_rgx = NULL , p_rs = NULL ,
85- save_rgx = TRUE ) {
99+ save_rgx = TRUE , save_sr = FALSE ) {
86100 vars = parse_bisg_form(formula , data )
87101
88102 l_name = make_name_tbl_vec(vars , p_r , p_rs , FALSE )
@@ -100,6 +114,11 @@ bisg <- function(formula, data=NULL, p_r=p_r_natl(), p_rgx=NULL, p_rs=NULL,
100114 attr(out , " p_rgx" ) = l_gx $ p_rgx
101115 attr(out , " gx" ) = l_gx $ GX
102116 }
117+ if (isTRUE(save_sr )) {
118+ attr(out , " p_rs" ) = l_name $ p_sr
119+ attr(out , " s" ) = l_name $ S
120+ }
121+ attr(out , " unmatched" ) = c(s = l_name $ unmatched , gx = l_gx $ unmatched )
103122 attr(out , " method" ) = " std"
104123
105124 out
@@ -148,6 +167,7 @@ bisg_me <- function(formula, data=NULL, p_r=p_r_natl(), p_rgx=NULL, p_rs=NULL,
148167 attr(out , " S_name" ) = vars $ S_name
149168 attr(out , " GX_names" ) = colnames(vars $ GX )
150169 attr(out , " p_r" ) = l_gx $ p_r
170+ attr(out , " unmatched" ) = c(s = l_name $ unmatched , gx = l_gx $ unmatched )
151171 attr(out , " method" ) = " me"
152172
153173 out
@@ -299,8 +319,11 @@ make_name_tbl_vec <- function(vars, p_r, p_rs, for_me=FALSE) {
299319 p_sr = p_sr [, idx_names ]
300320 }
301321
302- list (S = S ,
303- p_sr = p_sr ) # p_sr is actualy p_rs, unnormalized, if `for_me=TRUE`
322+ list (
323+ S = S ,
324+ p_sr = p_sr , # p_sr is actualy p_rs, unnormalized, if `for_me=TRUE`
325+ unmatched = sum(S == " <generic>" )
326+ )
304327}
305328
306329# Prepare geo/covariate vector and P(G, X | R) table
@@ -433,10 +456,13 @@ make_gx_tbl_vec <- function(vars, p_r, p_rgx) {
433456 p_r = p_r / sum(p_r )
434457 }
435458
436- list (GX = GX_vec ,
437- p_r = p_r ,
438- p_rgx = p_rgx ,
439- p_gxr = p_gxr )
459+ list (
460+ p_r = p_r ,
461+ GX = GX_vec ,
462+ p_rgx = p_rgx ,
463+ p_gxr = p_gxr ,
464+ unmatched = sum(vars $ GX [[1 ]] == " <none>" )
465+ )
440466}
441467
442468# Call Bayes' rule C++
0 commit comments