Skip to content

Commit 4acabf0

Browse files
committed
only coerce UTF8 factors if needed
1 parent e5e10a0 commit 4acabf0

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

inst/tests/tests.Rraw

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21265,6 +21265,9 @@ DT[1, V1 := samelevel]
2126521265
test(2311.1, nlevels(DT$V1), 1L) # used to be 2
2126621266
DT[1, V1 := factor("a", levels = c("a", samelevel))]
2126721267
test(2311.2, nlevels(DT$V1), 2L) # used to be 3
21268+
# runs instant without regression but takes ages with #7404
21269+
dt = data.table(id = as.factor(rep(seq_len(1e6), each = 100)), V1 = 1)
21270+
test(2311.3, dt[, base::max(V1, na.rm = TRUE), by = id], dt[, max(V1), by = id])
2126821271

2126921272
# avoid translateChar*() in OpenMP threads, #6883
2127021273
DF = list(rep(iconv("\uf8", from = "UTF-8", to = "latin1"), 2e6))

src/assign.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -755,10 +755,14 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
755755
SEXP targetLevels = PROTECT(getAttrib(target, R_LevelsSymbol)); protecti++;
756756
SEXP sourceLevels = source; // character source
757757
if (sourceIsFactor) { sourceLevels=PROTECT(getAttrib(source, R_LevelsSymbol)); protecti++; }
758-
sourceLevels = PROTECT(coerceUtf8IfNeeded(sourceLevels)); protecti++;
759-
if (!sourceIsFactor || !R_compute_identical(sourceLevels, targetLevels, 0)) { // !sourceIsFactor for test 2115.6
760-
const int nTargetLevels=length(targetLevels), nSourceLevels=length(sourceLevels);
758+
bool needUtf8Coerce = !sourceIsFactor || !R_compute_identical(sourceLevels, targetLevels, 0);
759+
if (needUtf8Coerce) {
760+
sourceLevels = PROTECT(coerceUtf8IfNeeded(sourceLevels)); protecti++;
761761
targetLevels = PROTECT(coerceUtf8IfNeeded(targetLevels)); protecti++;
762+
if (sourceIsFactor && R_compute_identical(sourceLevels, targetLevels, 0)) needUtf8Coerce = false;
763+
}
764+
if (needUtf8Coerce) {
765+
const int nTargetLevels=length(targetLevels), nSourceLevels=length(sourceLevels);
762766
const SEXP *targetLevelsD=STRING_PTR_RO(targetLevels), *sourceLevelsD=STRING_PTR_RO(sourceLevels);
763767
SEXP newSource = PROTECT(allocVector(INTSXP, length(source))); protecti++;
764768
savetl_init();

0 commit comments

Comments
 (0)