Skip to content

Commit 03fc49d

Browse files
committed
Performance boost by replacing TRUELENGTH -> STDVEC_TRUELENGTH for strings (no ALTREP strings + algorithms wouldn't work anymore).
1 parent 5537ba7 commit 03fc49d

File tree

2 files changed

+13
-13
lines changed

2 files changed

+13
-13
lines changed

src/chmatch.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
5858
savetl_init();
5959
for (int i=0; i<xlen; i++) {
6060
SEXP s = xd[i];
61-
const int tl = TRULEN(s);
61+
const int tl = STDVEC_TRUELENGTH(s);
6262
if (tl>0) {
6363
savetl(s); // R's internal hash (which is positive); save it
6464
SET_TRULEN(s,0);
@@ -75,7 +75,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
7575
int nuniq=0;
7676
for (int i=0; i<tablelen; ++i) {
7777
const SEXP s = td[i];
78-
int tl = TRULEN(s);
78+
int tl = STDVEC_TRUELENGTH(s);
7979
if (tl>0) { savetl(s); tl=0; }
8080
if (tl==0) SET_TRULEN(s, chmatchdup ? -(++nuniq) : -i-1); // first time seen this string in table
8181
}
@@ -105,12 +105,12 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
105105
error(_("Failed to allocate %"PRIu64" bytes working memory in chmatchdup: length(table)=%d length(unique(table))=%d"), ((uint64_t)tablelen*2+nuniq)*sizeof(int), tablelen, nuniq);
106106
// # nocov end
107107
}
108-
for (int i=0; i<tablelen; ++i) counts[-TRULEN(td[i])-1]++;
108+
for (int i=0; i<tablelen; ++i) counts[-STDVEC_TRUELENGTH(td[i])-1]++;
109109
for (int i=0, sum=0; i<nuniq; ++i) { int tt=counts[i]; counts[i]=sum; sum+=tt+1; }
110-
for (int i=0; i<tablelen; ++i) map[counts[-TRULEN(td[i])-1]++] = i+1; // 0 is left ending each group thanks to the calloc
110+
for (int i=0; i<tablelen; ++i) map[counts[-STDVEC_TRUELENGTH(td[i])-1]++] = i+1; // 0 is left ending each group thanks to the calloc
111111
for (int i=0, last=0; i<nuniq; ++i) {int tt=counts[i]+1; counts[i]=last; last=tt;} // rewind counts to the beginning of each group
112112
for (int i=0; i<xlen; ++i) {
113-
int u = TRULEN(xd[i]);
113+
int u = STDVEC_TRUELENGTH(xd[i]);
114114
if (u<0) {
115115
const int w = counts[-u-1]++;
116116
if (map[w]) { ansd[i]=map[w]; continue; }
@@ -123,11 +123,11 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
123123
free(map);
124124
} else if (chin) {
125125
for (int i=0; i<xlen; i++) {
126-
ansd[i] = TRULEN(xd[i])<0;
126+
ansd[i] = STDVEC_TRUELENGTH(xd[i])<0;
127127
}
128128
} else {
129129
for (int i=0; i<xlen; i++) {
130-
const int m = TRULEN(xd[i]);
130+
const int m = STDVEC_TRUELENGTH(xd[i]);
131131
ansd[i] = (m<0) ? -m : nomatch;
132132
}
133133
}

src/forder.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -307,10 +307,10 @@ static void range_str(const SEXP *x, int n, uint64_t *out_min, uint64_t *out_max
307307
na_count++;
308308
continue;
309309
}
310-
if (TRULEN(s)<0) continue; // seen this group before
310+
if (STDVEC_TRUELENGTH(s)<0) continue; // seen this group before
311311
#pragma omp critical
312-
if (TRULEN(s)>=0) { // another thread may have set it while I was waiting, so check it again
313-
if (TRULEN(s)>0) // save any of R's own usage of tl (assumed positive, so we can both count and save in one scan), to restore
312+
if (STDVEC_TRUELENGTH(s)>=0) { // another thread may have set it while I was waiting, so check it again
313+
if (STDVEC_TRUELENGTH(s)>0) // save any of R's own usage of tl (assumed positive, so we can both count and save in one scan), to restore
314314
savetl(s); // afterwards. From R 2.14.0, tl is initialized to 0, prior to that it was random so this step saved too much.
315315
// now save unique SEXP in ustr so i) we can loop through them afterwards and reset TRUELENGTH to 0 and ii) sort uniques when sorting too
316316
if (ustr_alloc<=ustr_n) {
@@ -351,7 +351,7 @@ static void range_str(const SEXP *x, int n, uint64_t *out_min, uint64_t *out_max
351351
for (int i=0; i<ustr_n; i++) {
352352
SEXP s = ustr3[i];
353353
if (LENGTH(s)>ustr_maxlen) ustr_maxlen=LENGTH(s);
354-
if (TRULEN(s)>0) savetl(s);
354+
if (STDVEC_TRUELENGTH(s)>0) savetl(s);
355355
}
356356
cradix(ustr3, ustr_n); // sort to detect possible duplicates after converting; e.g. two different non-utf8 map to the same utf8
357357
SET_TRULEN(ustr3[0], -1);
@@ -365,7 +365,7 @@ static void range_str(const SEXP *x, int n, uint64_t *out_min, uint64_t *out_max
365365
if (!tl)
366366
STOP(_("Failed to alloc tl when converting strings to UTF8")); // # nocov
367367
const SEXP *tt = STRING_PTR_RO(ustr2);
368-
for (int i=0; i<ustr_n; i++) tl[i] = TRULEN(tt[i]); // fetches the o in ustr3 into tl which is ordered by ustr
368+
for (int i=0; i<ustr_n; i++) tl[i] = STDVEC_TRUELENGTH(tt[i]); // fetches the o in ustr3 into tl which is ordered by ustr
369369
for (int i=0; i<ustr_n; i++) SET_TRULEN(ustr3[i], 0); // reset to 0 tl of the UTF8 (and possibly non-UTF in ustr too)
370370
for (int i=0; i<ustr_n; i++) SET_TRULEN(ustr[i], tl[i]); // put back the o into ustr's tl
371371
free(tl);
@@ -766,7 +766,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
766766
if (nalast==-1) anso[i]=0;
767767
elem = naval;
768768
} else {
769-
elem = -TRULEN(xd[i]);
769+
elem = -STDVEC_TRUELENGTH(xd[i]);
770770
}
771771
WRITE_KEY
772772
}}

0 commit comments

Comments
 (0)