Skip to content

Commit 47c1d3e

Browse files
Open up factor support in nafill (#7539)
* Open up factor support * a test of possibly valid factor filling * Reflect coerceAs update of levels in output
1 parent d273fbd commit 47c1d3e

File tree

4 files changed

+43
-5
lines changed

4 files changed

+43
-5
lines changed

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
### NEW FEATURES
1616

17-
1. `nafill()`, `setnafill()` extended to work on logical vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR.
17+
1. `nafill()`, `setnafill()` extended to work on logical and factor vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR.
1818

1919
2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; `TRUE` uses the default 3-second interval, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR.
2020

inst/tests/nafill.Rraw

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,30 @@ test(12.08, nafill(x, fill=NA_integer_), x)
333333
test(12.09, nafill(x, fill=NA_real_), x)
334334
test(12.10, nafill(x, fill=NaN), x)
335335

336+
## factor input
337+
x = rep(NA_character_, 10L)
338+
x[c(3:4, 7:8)] = c("a", "b", "a", "c")
339+
x = as.factor(x)
340+
test(13.01, nafill(x, "locf"), replace(replace(x, 5:6, "b"), 9:10, "c"))
341+
test(13.02, nafill(x, "nocb"), replace(x, c(1:2, 5:6), "a"))
342+
x_fill_a = replace(x, c(1:2, 5:6, 9:10), "a")
343+
test(13.03, nafill(x, fill="a"), x_fill_a)
344+
test(13.04, nafill(x, fill=1L), x_fill_a)
345+
test(13.05, nafill(x, fill=1.0), x_fill_a)
346+
test(13.06, nafill(x, fill=factor("a")), x_fill_a)
347+
test(13.07, nafill(x, fill=factor("a", levels=levels(x))), x_fill_a)
348+
test(13.08, nafill(x, fill=factor("a", levels=c("a", "b"))), x_fill_a)
349+
test(13.09, nafill(x, fill=factor("a", levels=c("a", "d"))), factor(x_fill_a, levels=c("a", "b", "c", "d")))
350+
x_fill_d = replace(factor(x, levels = c(levels(x), "d")), c(1:2, 5:6, 9:10), "d")
351+
test(13.10, nafill(x, fill="d"), x_fill_d)
352+
test(13.11, nafill(x, fill=factor("d", levels=c("a", "b", "c", "d"))), x_fill_d)
353+
test(13.12, nafill(x, fill=factor("d", levels=c("d", "a", "b", "c"))), x_fill_d)
354+
test(13.13, nafill(x, fill=factor("d", levels=c("d", "c", "b", "a"))), x_fill_d)
355+
test(13.14, nafill(x, fill=factor("d", levels=c("b", "c", "d"))), x_fill_d)
356+
test(13.15, nafill(x, fill=NA), x)
357+
test(13.16, nafill(x, fill=NA_integer_), x)
358+
test(13.17, nafill(x, fill=NA_character_), x)
359+
336360
## logical
337361
## character
338362
## factor

man/nafill.Rd

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ x = c(1, NA, NaN, 3, NaN, NA, 4)
3737
nafill(x, "locf")
3838
nafill(x, "locf", nan=NaN)
3939

40+
# works for factors
41+
x = gl(3, 2, 10)
42+
is.na(x) = 1:2
43+
nafill(x, "nocb")
44+
4045
# fill= applies to any leftover NA
4146
nafill(c(NA, x), "locf")
4247
nafill(c(NA, x), "locf", fill=0)

src/nafill.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
113113
if (obj_scalar) {
114114
if (binplace)
115115
error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table"));
116-
else if (!isReal(obj) && !isInteger(obj) && !isLogical(obj))
116+
else if (!isReal(obj) && TYPEOF(obj) != INTSXP && !isLogical(obj))
117117
error(_("'x' argument must be logical/numeric type, or list/data.table of logical/numeric types"));
118118
SEXP obj1 = obj;
119119
obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list
@@ -124,7 +124,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
124124
int *icols = INTEGER(ricols);
125125
for (int i=0; i<length(ricols); i++) {
126126
SEXP this_col = VECTOR_ELT(obj, icols[i]-1);
127-
if (!isReal(this_col) && !isInteger(this_col) && !isLogical(this_col))
127+
if (!isReal(this_col) && TYPEOF(this_col) != INTSXP && !isLogical(this_col))
128128
error(_("'x' argument must be logical/numeric type, or list/data.table of logical/numeric types"));
129129
SET_VECTOR_ELT(x, i, this_col);
130130
}
@@ -218,8 +218,17 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
218218

219219
if (!binplace) {
220220
for (R_len_t i=0; i<nx; i++) {
221-
if (ANY_ATTRIB(VECTOR_ELT(x, i)))
222-
copyMostAttrib(VECTOR_ELT(x, i), VECTOR_ELT(ans, i));
221+
SEXP xi = VECTOR_ELT(x, i);
222+
if (ANY_ATTRIB(xi)) {
223+
copyMostAttrib(xi, VECTOR_ELT(ans, i));
224+
if (itype == 0 && hasFill && isFactor(xi)) {
225+
SEXP fillLev = PROTECT(getAttrib(VECTOR_ELT(fill, i), R_LevelsSymbol));
226+
if (!R_compute_identical(PROTECT(getAttrib(xi, R_LevelsSymbol)), fillLev, 0)) {
227+
setAttrib(VECTOR_ELT(ans, i), R_LevelsSymbol, fillLev);
228+
}
229+
UNPROTECT(2);
230+
}
231+
}
223232
}
224233
SEXP obj_names = getAttrib(obj, R_NamesSymbol); // copy names
225234
if (!isNull(obj_names)) {

0 commit comments

Comments
 (0)