Skip to content

Commit 923fbcc

Browse files
author
maechler
committed
re-enable match(<Date>, <char>) (wrt 88356)
git-svn-id: https://svn.r-project.org/R/trunk@88363 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent 8bc5a24 commit 923fbcc

File tree

4 files changed

+84
-30
lines changed

4 files changed

+84
-30
lines changed

doc/NEWS.Rd

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,17 @@
144144
\PR{18885} (differently than the fix for \PR{15770}), with thanks to
145145
\I{Ivan Krylov}.
146146

147-
\item \code{t.test(c(1:3, Inf))} and similar no longer produce an error but
148-
return a (still not so useful) \code{"htest"} result, fixing
149-
\PR{18901}, thanks to \I{Jesse Alderliesten}.
150-
151147
\item \code{text()} now truncates \code{labels} to maximum length
152148
of \code{x} and \code{y} (if it is longer), fixing \PR{7084}.
153149
Thanks to \I{Heather Turner}, \I{Ella Kaye}, and
154150
\I{Philippe Grosjean}.
155151

156-
\item \code{attr(., "tsp") <- val} now uses \code{getOption("ts.eps")}
157-
instead of hardwired \code{1e-5}; consequently, \code{ts(.., ts.eps=*)}
158-
now passes \code{ts.eps} to the \code{"tsp"} setting C code;
159-
both fixing a long-standing \sQuote{\I{FIXME}}.
160-
161152
\item \code{<Date> \%in\% set} has become as fast again, as it was
162153
before \R 4.3.0, via new S3 method \code{mtfrm.Date}.
154+
Additionally, \code{<character> \%in\% <Date>} and vice versa are
155+
documented to work in concordance with \code{==} and as an exception
156+
to the typical \code{match()} behaviour which relies on
157+
\dQuote{univariate} \code{mtfrm()} alone.
163158
}
164159
}
165160
}
@@ -176,7 +171,14 @@
176171

177172
\subsection{BUG FIXES}{
178173
\itemize{
179-
\item .
174+
\item \code{t.test(c(1:3, Inf))} and similar no longer produce an error but
175+
return a (still not so useful) \code{"htest"} result, fixing
176+
\PR{18901}, thanks to \I{Jesse Alderliesten}.
177+
178+
\item \code{attr(., "tsp") <- val} now uses \code{getOption("ts.eps")}
179+
instead of hardwired \code{1e-5}; consequently, \code{ts(.., ts.eps=*)}
180+
now passes \code{ts.eps} to the \code{"tsp"} setting C code;
181+
both fixing a long-standing \sQuote{\I{FIXME}}.
180182
}
181183
}
182184
}

src/library/base/man/match.Rd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
% File src/library/base/man/match.Rd
22
% Part of the R package, https://www.R-project.org
3-
% Copyright 1995-2017 R Core Team
3+
% Copyright 1995-2025 R Core Team
44
% Distributed under GPL 2 or later
55

66
\name{match}
@@ -58,6 +58,12 @@ x \%in\% table
5858
complex < character) before matching. If \code{incomparables} has
5959
positive length it is coerced to the common type.
6060
61+
One exception to the above happens when \code{match()}ing
62+
\code{\link{Date}} objects and \code{\link{character}}s. There, the
63+
character argument is coerced to \code{"Date"} (via
64+
\code{\link{as.Date.character}()}) before the above \code{mtfrm()} is
65+
applied.
66+
6167
Matching for lists is potentially very slow and best avoided except in
6268
simple cases.
6369

src/main/unique.c

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ static Rboolean duplicatedInit(SEXP x, HashData *d)
599599
} else if (TYPEOF(x) == CLOSXP) {
600600
if (duplicatedInit(BODY_EXPR(x), d))
601601
stop = TRUE;
602-
}
602+
}
603603
return stop;
604604
}
605605

@@ -801,7 +801,7 @@ static SEXP sorted_Duplicated(SEXP x, Rboolean from_last, int nmax)
801801
SORTED_DUP_NANS(PARTIAL, 0, i < nb, i++);
802802
} // from_last
803803
} // numnas > 0
804-
804+
805805
if(numnas < n) {
806806
startpos = nas1st ? numnas : 0;
807807
SORTED_DUP_NONNANS(startpos, n - numnas - 1, rtmp, double, REAL);
@@ -818,7 +818,7 @@ static SEXP sorted_Duplicated(SEXP x, Rboolean from_last, int nmax)
818818
#undef SORTED_DUP_NANS
819819
#undef DUP_DO_ONE
820820

821-
/* to add sorted fastpass support for new SEXP types modify sorted_Duplicated
821+
/* to add sorted fastpass support for new SEXP types modify sorted_Duplicated
822822
and sorted_any_Duplicated then add them here */
823823
#define DUP_KNOWN_SORTED(x) \
824824
((TYPEOF(x) == INTSXP && KNOWN_SORTED(INTEGER_IS_SORTED(x))) || \
@@ -835,7 +835,7 @@ static SEXP Duplicated(SEXP x, Rboolean from_last, int nmax)
835835
return allocVector(LGLSXP, 0);
836836
else if (n == 1)
837837
return ScalarLogical(FALSE);
838-
838+
839839
if(DUP_KNOWN_SORTED(x)) {
840840
return sorted_Duplicated(x, from_last, nmax);
841841
}
@@ -866,7 +866,7 @@ attribute_hidden R_xlen_t sorted_any_duplicated(SEXP x, Rboolean from_last) {
866866
int itmp, sorted;
867867
double rtmp;
868868
Rboolean seen_na = FALSE, seen_nan = FALSE, na1st = FALSE;
869-
869+
870870
#define SORTED_ANYDUP_NONNANS_FROM_LAST(start, count, tmpvar, eetype, vvtype) do { \
871871
if (count > 1) { \
872872
tmpvar = vvtype##_ELT(x, start + count - 1); \
@@ -944,7 +944,7 @@ attribute_hidden R_xlen_t sorted_any_duplicated(SEXP x, Rboolean from_last) {
944944
R_xlen_t numnas = sorted_real_count_NANs(x), napivot;
945945
napivot = XLENGTH(x) - numnas;
946946
na1st = KNOWN_NA_1ST(sorted);
947-
947+
948948
if(from_last) {
949949
if(na1st) {
950950
SORTED_ANYDUP_NONNANS_FROM_LAST(numnas, napivot, rtmp, double,
@@ -1112,7 +1112,7 @@ attribute_hidden SEXP do_duplicated(SEXP call, SEXP op, SEXP args, SEXP env)
11121112

11131113
/* handle zero length vectors, and NULL */
11141114
R_xlen_t n = xlength(x);
1115-
if (n == 0)
1115+
if (n == 0)
11161116
return(PRIMVAL(op) <= 1
11171117
? allocVector(PRIMVAL(op) != 1 ? LGLSXP : TYPEOF(x), 0)
11181118
: ScalarInteger(0));
@@ -1161,7 +1161,7 @@ attribute_hidden SEXP do_duplicated(SEXP call, SEXP op, SEXP args, SEXP env)
11611161
for(R_xlen_t j=0; j < nb; j++)
11621162
if(duptr[j] == 0) k++;
11631163
});
1164-
1164+
11651165
SEXP ans = PROTECT(allocVector(TYPEOF(x), k));
11661166

11671167
k = 0;
@@ -1291,7 +1291,7 @@ static SEXP match_transform(SEXP s, SEXP env)
12911291
if(inherits(s, "factor")) return asCharacterFactor(s);
12921292
/*
12931293
else if(inherits(s, "POSIXlt")) { // and maybe more classes in the future:
1294-
// Call R's (generic) as.character(s):
1294+
// Call R's (generic) as.character(s):
12951295
SEXP call, r;
12961296
PROTECT(call = lang2(R_AsCharacterSymbol, s));
12971297
r = eval(call, env);
@@ -1334,7 +1334,7 @@ static SEXP asUTF8(SEXP x)
13341334
} else
13351335
return x;
13361336
}
1337-
1337+
13381338
// workhorse of R's match() and hence also " ix %in% itable "
13391339
static /* or attribute_hidden? */
13401340
SEXP match5(SEXP itable, SEXP ix, int nmatch, SEXP incomp, SEXP env)
@@ -1351,10 +1351,37 @@ SEXP match5(SEXP itable, SEXP ix, int nmatch, SEXP incomp, SEXP env)
13511351
return ans;
13521352
}
13531353

1354-
int nprot = 0;
1355-
SEXP x = PROTECT(match_transform(ix, env)); nprot++;
1356-
SEXP table = PROTECT(match_transform(itable, env)); nprot++;
1357-
/* or should we use PROTECT_WITH_INDEX and REPROTECT below ? */
1354+
SEXP x, table;
1355+
int nprot = 2; /* x, table */
1356+
PROTECT_INDEX xpi, tbpi;
1357+
1358+
bool D1; /* special case <Date> o <character> */
1359+
if ((D1 = isObject(ix) && inherits(ix, "Date") && isValidString(itable)) ||
1360+
( isObject(itable) && inherits(itable, "Date") && isValidString(ix))) {
1361+
/* Do *not* translate the <Date> to integer below (which later would be coerced
1362+
* to character: e.g, as.character(as.vector(as.Date("2025-06-26"))) |--> "20265"
1363+
* but rather *do* as.Date(<character>) for the other, and then compare (the numbers of)
1364+
* as.vector(<Date>).
1365+
*/
1366+
SEXP call, form_Ymd = PROTECT(mkString("%Y-%m-%d"));
1367+
nprot += 2; /* form_Ymd, call */
1368+
if(D1) { // table := as.Date.character(itable, "%Y-%m-%d")
1369+
PROTECT(call = lang3(install("as.Date.character"), itable, form_Ymd));
1370+
PROTECT_WITH_INDEX(table = eval(call, env), &tbpi);
1371+
1372+
REPROTECT( table = match_transform(table, env), tbpi);
1373+
PROTECT_WITH_INDEX(x = match_transform(ix, env), &xpi);
1374+
} else { // x := as.Date.character(ix, "%Y-%m-%d")
1375+
PROTECT(call = lang3(install("as.Date.character"), ix, form_Ymd));
1376+
PROTECT_WITH_INDEX(x = eval(call, env), &xpi);
1377+
1378+
REPROTECT( x = match_transform(x, env), xpi);
1379+
PROTECT_WITH_INDEX(table = match_transform(itable, env), &tbpi);
1380+
}
1381+
} else { /* regular cases */
1382+
PROTECT_WITH_INDEX(x = match_transform(ix, env), &xpi);
1383+
PROTECT_WITH_INDEX(table = match_transform(itable, env), &tbpi);
1384+
}
13581385

13591386
SEXPTYPE type;
13601387
/* Coerce to a common type; type == NILSXP is ok here.
@@ -1363,8 +1390,8 @@ SEXP match5(SEXP itable, SEXP ix, int nmatch, SEXP incomp, SEXP env)
13631390
* (given that we have "Vector" or NULL) */
13641391
if(TYPEOF(x) >= STRSXP || TYPEOF(table) >= STRSXP) type = STRSXP;
13651392
else type = TYPEOF(x) < TYPEOF(table) ? TYPEOF(table) : TYPEOF(x);
1366-
PROTECT(x = coerceVector(x, type)); nprot++;
1367-
PROTECT(table = coerceVector(table, type)); nprot++;
1393+
REPROTECT(x = coerceVector(x, type), xpi);
1394+
REPROTECT(table = coerceVector(table, type), tbpi);
13681395

13691396
// special case scalar x -- for speed only :
13701397
if(XLENGTH(x) == 1 && !incomp) {
@@ -2406,7 +2433,7 @@ static void rehash(R_hashtab_type h, int resize)
24062433

24072434
HT_COUNT(h) = 0;
24082435
HT_VALIDATE(h);
2409-
2436+
24102437
SET_HT_TABLE(h, allocVector(VECSXP, new_size));
24112438
if (resize) HT_TABLE_K(h)++;
24122439

@@ -2436,7 +2463,7 @@ static SEXP getcell(R_hashtab_type h, SEXP key, int *pidx)
24362463
chain = CDR(chain);
24372464
}
24382465
return R_NilValue;
2439-
}
2466+
}
24402467

24412468

24422469
/*
@@ -2557,7 +2584,7 @@ SEXP R_maphash(R_hashtab_type h, SEXP FUN)
25572584
SEXP env = PROTECT(R_NewEnv(R_GlobalEnv, FALSE, 0));
25582585
SEXP call = PROTECT(lang3(FUN_sym, key_sym, val_sym));
25592586
defvar(FUN_sym, FUN, env);
2560-
2587+
25612588
SEXP table = PROTECT(HT_TABLE(h)); // PROTECT in case FUN causes a rehash
25622589
int size = LENGTH(table);
25632590
for (int i = 0; i < size; i++) {

tests/reg-tests-1e.R

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2002,6 +2002,25 @@ assertErrV( ts(1:711, frequency=2*pi, start = 1, end = 114, ts.eps = 1e-6) )
20022002
## did *not* error in R <= 4.5.1, as 'ts.eps' was *not* passed to C code
20032003

20042004

2005+
## match(<Date>, <character>) and vice versa
2006+
date_seq <- seq(as.Date("1705-01-01"), as.Date("2024-12-31"), by="days")
2007+
dt1 <- as.Date("2024-05-01")
2008+
dt3 <- c(dt1, as.Date(c("1800-01-01", "2025-02-02")))
2009+
system.time({
2010+
tmp <- dt1 %in% date_seq
2011+
tm3 <- dt3 %in% date_seq
2012+
})# 0.260 in R 4.3.0 ff
2013+
## 0.003 or so after fixing
2014+
stopifnot(tmp, identical(tm3, c(TRUE, TRUE, FALSE)))
2015+
## The 1-1 case (fast branch in C's match5()):
2016+
ch <- "2025-05-05" ; D <- as.Date(ch)
2017+
c2 <- "1925-05-05"
2018+
c(cDT = ch %in% D, DcT = D %in% ch,
2019+
cDF = c2 %in% D, DcF = D %in% c2)
2020+
stopifnot(ch %in% D, D %in% ch, !(c2 %in% D), !(D %in% c2))
2021+
## had failed in R-devel around 2025-06-26 (and before R 4.3.0)
2022+
2023+
20052024

20062025
## keep at end
20072026
rbind(last = proc.time() - .pt,

0 commit comments

Comments
 (0)