Skip to content

Commit 9689f9e

Browse files
authored
Implementation of %notin% (#4931)
1 parent 8edbd67 commit 9689f9e

File tree

10 files changed

+88
-8
lines changed

10 files changed

+88
-8
lines changed

.gitlab-ci.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,15 @@ build: ## build data.table sources as tar.gz archive
9494
- mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION
9595

9696
.test-install-r-rel-win: &install-r-rel-win
97-
- curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.1.3-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
97+
- curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
9898
# see #5198 for discussion about the https link used above; it will break each time R is released and the version number will need to be updated
9999
.test-install-r-devel-win: &install-r-devel-win
100100
- curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
101101
.test-install-r-oldrel-win: &install-r-oldrel-win
102-
- curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.0.5/R-4.0.5-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
102+
- curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.1.3/R-4.1.3-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
103103

104104
.test-install-rtools-win: &install-rtools-win
105-
- curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools40-x86_64.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools40" -NoNewWindow -Wait
105+
- curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5253-5107-signed.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait
106106

107107
.test-template: &test
108108
stage: test
@@ -246,7 +246,7 @@ test-rel-win: ## R-release on Windows, test and build binaries
246246
before_script:
247247
- *install-r-rel-win
248248
- *install-rtools-win
249-
- $ENV:PATH = "C:\R\bin;C:\rtools40\usr\bin;$ENV:PATH"
249+
- $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH"
250250
- Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most'), quiet=TRUE)"
251251
- *cp-src-win
252252
- rm.exe -r bus
@@ -264,7 +264,7 @@ test-dev-win: ## R-devel on Windows; see #5294 for changes in Dec 2021 related t
264264
R_VERSION: "$R_DEVEL_VERSION"
265265
before_script:
266266
- *install-r-devel-win
267-
- curl.exe -s -o ../rtools.exe https://www.r-project.org/nosvn/winutf8/ucrt3/rtools42-5038-5046.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait
267+
- *install-rtools-win
268268
- $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH"
269269
- Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most', exclude=c('knitr','rmarkdown')), quiet=TRUE)" ## exclude= for #5294
270270
- *cp-src-win
@@ -284,7 +284,7 @@ test-old-win: ## R-oldrel on Windows
284284
before_script:
285285
- *install-r-oldrel-win
286286
- *install-rtools-win
287-
- $ENV:PATH = "C:\R\bin;C:\rtools40\usr\bin;$ENV:PATH"
287+
- $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH"
288288
- Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most', exclude=c('knitr','rmarkdown')), quiet=TRUE)" ## exclude= for #5294
289289
- *cp-src-win
290290
- rm.exe -r bus

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ Authors@R: c(
7171
person("Boniface Christian","Kamgang", role="ctb"),
7272
person("Olivier","Delmarcell", role="ctb"),
7373
person("Josh","O'Brien", role="ctb"),
74-
person("Dereck","de Mezquita", role="ctb"))
74+
person("Dereck","de Mezquita", role="ctb"),
75+
person("Michael","Czekanski", role="ctb"))
7576
Depends: R (>= 3.1.0)
7677
Imports: methods
7778
Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ exportClasses(data.table, IDate, ITime)
88
export(data.table, tables, setkey, setkeyv, key, "key<-", haskey, CJ, SJ, copy)
99
export(setindex, setindexv, indices)
1010
export(as.data.table,is.data.table,test.data.table)
11-
export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%")
11+
export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%", "%notin%")
1212
export(timetaken)
1313
export(truelength, setalloccol, alloc.col, ":=", let)
1414
export(setattr, setnames, setcolorder, set, setDT, setDF)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,8 @@
294294

295295
40. New functions `yearmon()` and `yearqtr` give a combined representation of `year()` and `month()`/`quarter()`. These and also `yday`, `wday`, `mday`, `week`, `month` and `year` are now optimized for memory and compute efficiency by removing the `POSIXlt` dependency, [#649](https://github.com/Rdatatable/data.table/issues/649). Thanks to Matt Dowle for the request, and Benjamin Schwendinger for the PR.
296296

297+
41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error.
298+
297299
## BUG FIXES
298300

299301
1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.

R/notin.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"%notin%" = function(x, table) {
2+
if (is.character(x) && is.character(table)) {
3+
.Call(Cnotchin, x, table)
4+
} else {
5+
match(x, table, nomatch = 0L) == 0L
6+
}
7+
}

inst/tests/tests.Rraw

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18804,3 +18804,13 @@ test(2237.1, as.data.frame(dt, row.names=c("x", "y")), df)
1880418804
df = data.frame(a=1:2, b=3:4)
1880518805
test(2237.2, as.data.frame(dt, row.names=NULL), df)
1880618806

18807+
# Test new feature %notin%, #4152
18808+
test(2238.1, 11 %notin% 1:10, TRUE)
18809+
test(2238.2, "a" %notin% c(), TRUE)
18810+
test(2238.3, "a" %notin% c("a", "b", "c"), FALSE)
18811+
test(2238.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE))
18812+
test(2238.5, "a" %notin% character(), TRUE)
18813+
test(2238.6, "a" %notin% integer(), TRUE)
18814+
test(2238.7, "a" %notin% NULL, TRUE)
18815+
test(2238.8, NA %notin% 1:5, TRUE)
18816+
test(2238.9, NA %notin% c(1:5, NA), FALSE)

man/notin.Rd

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
\name{notin}
2+
\alias{\%notin\%}
3+
4+
\title{
5+
Convenience operator for checking if an example is not in a set of elements
6+
}
7+
8+
\description{
9+
Check whether an object is absent from a table, i.e., the logical inverse of \code{\link[=base]{in}}.
10+
}
11+
12+
\usage{
13+
x \%notin\% table
14+
}
15+
16+
\arguments{
17+
\item{x}{ Vector or \code{NULL}: the values to be matched. }
18+
\item{table}{ Vector or \code{NULL}: the values to be matched against. }
19+
}
20+
21+
22+
\value{
23+
Logical vector, \code{TRUE} for each element of \code{x} \emph{absent} from \code{table}, and \code{FALSE} for each element of \code{x} \emph{present} in \code{table}.
24+
}
25+
26+
\seealso{ \code{\link[base]{match}}, \code{\link[data.table]{chmatch}} }
27+
28+
29+
\examples{
30+
11 \%notin\% 1:10 # TRUE
31+
"a" \%notin\% c("a", "b") # FALSE
32+
}
33+

src/data.table.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,3 +259,6 @@ int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...);
259259

260260
// programming.c
261261
SEXP substitute_call_arg_namesR(SEXP expr, SEXP env);
262+
263+
//negate.c
264+
SEXP notchin(SEXP x, SEXP table);

src/init.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ SEXP test_dt_win_snprintf();
131131
SEXP dt_zlib_version();
132132
SEXP startsWithAny();
133133
SEXP convertDate();
134+
SEXP notchin();
134135

135136
// .Externals
136137
SEXP fastmean();
@@ -230,6 +231,7 @@ R_CallMethodDef callMethods[] = {
230231
{"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1},
231232
{"CstartsWithAny", (DL_FUNC)&startsWithAny, -1},
232233
{"CconvertDate", (DL_FUNC)&convertDate, -1},
234+
{"Cnotchin", (DL_FUNC)&notchin, -1},
233235
{NULL, NULL, 0}
234236
};
235237

src/negate.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#include "data.table.h"
2+
3+
void negateByRef(SEXP x) {
4+
if(TYPEOF(x) != LGLSXP) {
5+
error("not logical or integer vector"); // # nocov
6+
}
7+
const int n = length(x);
8+
Rboolean *ansd = (Rboolean *)LOGICAL(x);
9+
for(int i=0; i<n; ++i) {
10+
ansd[i] ^= (ansd[i] != NA_LOGICAL); // invert true/false but leave NA alone
11+
}
12+
}
13+
14+
15+
SEXP notchin(SEXP x, SEXP table) {
16+
// see discussion in PR#4931
17+
SEXP result = PROTECT(chin(x, table));
18+
negateByRef(result); // save memory
19+
UNPROTECT(1);
20+
return result;
21+
}
22+

0 commit comments

Comments
 (0)