Skip to content

Commit 4437c82

Browse files
authored
utilizando factor labels (#268)
1 parent bfcd5bb commit 4437c82

File tree

1 file changed

+119
-23
lines changed

1 file changed

+119
-23
lines changed

R/datasus.R

Lines changed: 119 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -283,35 +283,131 @@ load_datasus <- function(dataset,
283283

284284
if( param$dataset == "datasus_sinasc" ) {
285285

286-
dat <- dat %>%
286+
labels <- tibble::tribble(
287+
~ var_code, ~ value, ~ label_pt, ~ label_eng,
288+
"origem", 1, "oracle", "oracle",
289+
"origem", 2, "ftp", "ftp",
290+
"origem", 3, "sead", "sead",
291+
"locnasc", 1, "hospital", "hospital",
292+
"locnasc", 2, "outros estabelecimentos de saude", "other health establishments",
293+
"locnasc", 3, "domicilio", "home",
294+
"locnasc", 4, "outros", "other",
295+
"locnasc", 5, "aldeia indigena", "indigenous village",
296+
"locnasc", 9, "ignorado", "unknown",
297+
"estcivmae", 1, "solteira", "single",
298+
"estcivmae", 2, "casada", "married",
299+
"estcivmae", 3, "viuva", "widowed",
300+
"estcivmae", 4, "divorciada", "divorced",
301+
"estcivmae", 5, "uniao estavel", "civil union",
302+
"estcivmae", 9, "ignorado", "unknown",
303+
"escmae", 1, "nenhuma", "none",
304+
"escmae", 2, "1 a 2 anos", "1 to 2 years",
305+
"escmae", 3, "4 a 7 anos", "4 to 7 years",
306+
"escmae", 4, "8 a 11 anos", "8 to 11 years",
307+
"escmae", 5, "12 e mais", "12 or more years",
308+
"escmae", 9, "ignorado", "unknown",
309+
"semagestac", 1, "menos de 22 semanas", "less than 22 weeks",
310+
"semagestac", 2, "22 a 27 semanas", "22 to 27 weeks",
311+
"semagestac", 3, "28 a 31 semanas", "28 to 31 weeks",
312+
"semagestac", 4, "32 a 36 semanas", "32 to 36 weeks",
313+
"semagestac", 5, "37 a 41 semanas", "37 to 41 weeks",
314+
"semagestac", 6, "42 semanas e mais", "42 weeks or more",
315+
"semagestac", 9, "ignorado", "unknown",
316+
"gravidez", 1, "unica", "single",
317+
"gravidez", 2, "dupla", "twin",
318+
"gravidez", 3, "tripla ou mais", "triplet or more",
319+
"gravidez", 9, "ignorado", "unknown",
320+
"parto", 1, "vaginal", "vaginal",
321+
"parto", 2, "cesario", "cesarean",
322+
"parto", 9, "ignorado", "unknown",
323+
"consprenat", 1, "nenhuma", "none",
324+
"consprenat", 2, "de 1 a 3", "1 to 3",
325+
"consprenat", 3, "de 4 a 6", "4 to 6",
326+
"consprenat", 4, "7 e mais", "7 or more",
327+
"consprenat", 9, "ignorado", "unknown",
328+
"sexo", 0, "ignorado", "unknown",
329+
"sexo", 1, "masculino", "male",
330+
"sexo", 2, "feminino", "female",
331+
"racacor", 1, "branca", "white",
332+
"racacor", 2, "preta", "black",
333+
"racacor", 3, "amarela", "yellow",
334+
"racacor", 4, "parda", "brown",
335+
"racacor", 5, "indigena", "indigenous",
336+
"idanomal", 1, "ignorado", "unknown",
337+
"idanomal", 2, "sim", "yes",
338+
"idanomal", 9, "nao", "no",
339+
"escmae2010", 0, "sem escolaridade", "no education",
340+
"escmae2010", 1, "fundamental 1", "elementary 1",
341+
"escmae2010", 2, "fundamental 2", "elementary 2",
342+
"escmae2010", 3, "medio", "high school",
343+
"escmae2010", 4, "superior incompleto", "incomplete higher education",
344+
"escmae2010", 5, "superior completo", "complete higher education",
345+
"escmae2010", 9, "ignorado", "unknown",
346+
"racacormae", 1, "branca", "white",
347+
"racacormae", 2, "preta", "black",
348+
"racacormae", 3, "amarela", "yellow",
349+
"racacormae", 4, "parda", "brown",
350+
"racacormae", 5, "indigena", "indigenous",
351+
"tpmetestim", 1, "exame fisico", "physical exam",
352+
"tpmetestim", 2, "outro metodo", "other method",
353+
"tpmetestim", 9, "ignorado", "unknown",
354+
"tpapresent", 1, "cefalica", "cephalic",
355+
"tpapresent", 2, "pelvica ou podalica", "breech or footling",
356+
"tpapresent", 3, "transversa", "transverse",
357+
"tpapresent", 9, "ignorado", "unknown",
358+
"sttrabpart", 1, "sim", "yes",
359+
"sttrabpart", 2, "nao", "no",
360+
"sttrabpart", 9, "ignorado", "unknown",
361+
"stcesparto", 1, "sim", "yes",
362+
"stcesparto", 2, "nao", "no",
363+
"stcesparto", 3, "nao se aplica", "not applicable",
364+
"stcesparto", 9, "ignorado", "unknown",
365+
"tpnascassi", 1, "medico", "doctor",
366+
"tpnascassi", 2, "enfermeira obstetriz", "obstetric nurse",
367+
"tpnascassi", 3, "parteira", "midwife",
368+
"tpnascassi", 4, "outros", "other",
369+
"tpnascassi", 9, "ignorado", "unknown"
370+
)
287371

288-
# Documentando as colunas
372+
# adicionando factor labels
373+
374+
dat <- dat %>%
289375
dplyr::mutate(
290-
origem = dplyr::recode(origem, '1' = "oracle", '2' = "ftp", '3' = "sead"),
291-
locnasc = dplyr::recode(locnasc, '1' = "hospital", '2' = "outros estabelecimentos de saude", '3' = "domicilio", '4' = "outros", '5' = "aldeia indigena", '9' = "ignorado"),
292-
estcivmae = dplyr::recode(estcivmae, '1' = "solteira", '2' = "casada", '3' = "viuva", '4' = "divorciada", '5' = "uniao estavel", '9' = "ignorada"),
293-
escmae = dplyr::recode(escmae, '1' = "nenhuma", '2' = "1 a 2 anos", '3' = "4 a 7 anos", '4' = "8 a 11 anos", '5' = "12 e mais", '9' = "ignorado"),
294-
semagestac = dplyr::recode(semagestac, '1' = "menos de 22 semanas", '2' = "22 a 27 semanas", '3' = "28 a 31 semanas", '4' = "32 a 36 semanas", '5' = "37 a 41 semanas", '6' = "42 semanas e mais", '9' = "ignorado"),
295-
gravidez = dplyr::recode(gravidez, '1' = "unica", '2' = "dupla", '3' = "tripla ou mais", '9' = "ignorado"),
296-
parto = dplyr::recode(parto, '1' = "vaginal", '2' = "cesario", '9' = "ignorado"),
297-
consprenat = dplyr::recode(consprenat, '1' = "nenhuma", '2' = "de 1 a 3", '3' = "de 4 a 6", '4' = "7 e mais", '9' = "ignorado"),
298-
sexo = dplyr::recode(sexo, '0' = "ignorado", '1' = "masculino", '2' = "feminino"),
299-
racacor = dplyr::recode(racacor, '1' = "branca", '2' = "preta", '3' = "amarela", '4' = "parda", '5' = "indigena"),
300-
idanomal = dplyr::recode(idanomal, '9' = "ignorado", '1' = "sim", '2' = "nao"),
301-
escmae2010 = dplyr::recode(escmae2010, '0' = "sem escolaridade", '1' = "fundamental 1", '2' = "fundamental 2", '3' = "medio", '4' = "superior incompleto", '5' = "superior completo", '9' = "ignorado"),
302-
dtnascmae = lubridate::dmy(as.character(dtnascmae)),
303-
racacormae = dplyr::recode(racacormae, '1' = "branca", '2' = "preta", '3' = "amarela", '4' = "parda", '5' = "indigena"),
304-
dtultmenst = lubridate::dmy(as.character(dtultmenst)),
305-
tpmetestim = dplyr::recode(tpmetestim, '1' = "exame fisico", '2' = "outro metodo", '9' = "ignorado"),
306-
tpapresent = dplyr::recode(tpapresent, '1' = "cefalica", '2' = "pelvica ou podalica", '3' = "transversa", '9' = "ignorado"),
307-
sttrabpart = dplyr::recode(sttrabpart, '1' = "sim", '2' = "nao", '9' = "ignorado"),
308-
stcesparto = dplyr::recode(stcesparto, '1' = "sim", '2' = "nao", '3' = "nao se aplica", '9' = "ignorado"),
309-
tpnascassi = dplyr::recode(tpnascassi, '1' = "medico", '2' = "enfermeira obstetriz", '3' = "parteira", '4' = "outros", '9' = "ignorado")
376+
dplyr::across(
377+
dplyr::any_of(unique(labels$var_code)),
378+
function(x) {
379+
# linhas do dict correspondentes a cada variavel
380+
dic <- labels %>%
381+
dplyr::filter(var_code == dplyr::cur_column())
382+
383+
# vetor de levels
384+
lev <- dic$value
385+
386+
# vetor de labels
387+
if (param$language == "pt") {
388+
lab <- dic$label_pt
389+
}
390+
else {
391+
lab <- dic$label_eng
392+
}
393+
394+
# transforma em factor
395+
396+
factor(x, levels = lev, labels = lab)
397+
}
398+
)
310399
)
311400

401+
# formatando dados
402+
312403
dat <- dat %>%
313-
dplyr::mutate(codmunnasc = as.numeric(as.character(codmunnasc))) %>%
404+
dplyr::mutate(
405+
dtnascmae = lubridate::dmy(as.character(dtnascmae)),
406+
dtultmenst = lubridate::dmy(as.character(dtultmenst)),
407+
codmunnasc = as.numeric(as.character(codmunnasc))
408+
) %>%
314409
dplyr::rename("code_muni_6" = "codmunnasc")
410+
315411
}
316412

317413
if (!(param$dataset %in% c("datasus_sih"))) {

0 commit comments

Comments
 (0)