Skip to content

Commit 2c7a646

Browse files
author
maechler
committed
allow slow pmatch()ing to be interrupted
git-svn-id: https://svn.r-project.org/R/trunk@85816 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent e1e7acb commit 2c7a646

File tree

2 files changed

+28
-26
lines changed

2 files changed

+28
-26
lines changed

doc/NEWS.Rd

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,10 @@
559559

560560
\item \pkg{tools}\code{:: startDynamicHelp()} now ensures \code{port}
561561
is in proper range, fixing \PR{18645}.
562+
563+
\item \code{pmatch(x, table)} for large \code{table}, also called for
564+
data frame row selection, \code{dfrm[nm, ]}, is now interruptable,
565+
fixing \PR{18656}.
562566
}
563567
}
564568
}

src/main/unique.c

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* R : A Computer Language for Statistical Data Analysis
3-
* Copyright (C) 1997--2023 The R Core Team
3+
* Copyright (C) 1997--2024 The R Core Team
44
* Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
55
*
66
* This program is free software; you can redistribute it and/or modify
@@ -323,16 +323,14 @@ static hlen vhash(SEXP x, R_xlen_t indx, HashData *d)
323323

324324
static hlen vhash_one(SEXP _this, HashData *d)
325325
{
326-
int i;
327-
unsigned int key;
328-
329326
/* Handle environments by pointer hashing. Previously,
330327
environments were hashed based only on length, which is not
331328
very effective and could be expensive to compute. */
332329
if (TYPEOF(_this) == ENVSXP)
333330
return scatter(PTRHASH(_this), d);
334331

335-
key = OBJECT(_this) + 2*TYPEOF(_this) + 100U*(unsigned int) length(_this);
332+
int i;
333+
unsigned int key = OBJECT(_this) + 2*TYPEOF(_this) + 100U*(unsigned int) length(_this);
336334
/* maybe we should also look at attributes, but that slows us down */
337335
switch (TYPEOF(_this)) {
338336
case LGLSXP:
@@ -1497,8 +1495,8 @@ attribute_hidden SEXP do_match(SEXP call, SEXP op, SEXP args, SEXP env)
14971495
{
14981496
checkArity(op, args);
14991497

1500-
if ((!isVector(CAR(args)) && !isNull(CAR(args)))
1501-
|| (!isVector(CADR(args)) && !isNull(CADR(args))))
1498+
if ((!isVector(CAR (args)) && !isNull(CAR (args))) ||
1499+
(!isVector(CADR(args)) && !isNull(CADR(args))))
15021500
error(_("'match' requires vector arguments"));
15031501

15041502
int nomatch = asInteger(CADDR(args));
@@ -1526,35 +1524,32 @@ attribute_hidden SEXP do_match(SEXP call, SEXP op, SEXP args, SEXP env)
15261524
* Empty strings are unmatched BDR 2000/2/16
15271525
*/
15281526

1527+
// .Internal(pmatch(x, table, nomatch, duplicates.ok))
15291528
attribute_hidden SEXP do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env)
15301529
{
1531-
SEXP ans, input, target;
1532-
int mtch, n_target, mtch_count, dups_ok, no_match;
1533-
size_t temp;
1534-
int *used = NULL, *ians;
1535-
const char **in, **tar;
1536-
Rboolean no_dups;
1537-
Rboolean useBytes = FALSE, useUTF8 = FALSE;
1538-
15391530
checkArity(op, args);
1540-
input = CAR(args);
1531+
SEXP input = CAR(args), // = x in R
1532+
target = CADR(args), // = table "
1533+
ans;
15411534
R_xlen_t n_input = XLENGTH(input);
1542-
target = CADR(args);
1543-
n_target = LENGTH(target); // not allowed to be long
1544-
no_match = asInteger(CADDR(args));
1545-
dups_ok = asLogical(CADDDR(args));
1535+
1536+
int n_target = LENGTH(target), // not allowed to be long
1537+
no_match = asInteger(CADDR(args)),
1538+
dups_ok = asLogical(CADDDR(args));
15461539
if (dups_ok == NA_LOGICAL)
15471540
error(_("invalid '%s' argument"), "duplicates.ok");
1548-
no_dups = !dups_ok;
1541+
Rboolean no_dups = !dups_ok;
15491542

15501543
if (!isString(input) || !isString(target))
15511544
error(_("argument is not of mode character"));
15521545

1546+
int *used = NULL;
15531547
if(no_dups) {
15541548
used = (int *) R_alloc((size_t) n_target, sizeof(int));
15551549
for (int j = 0; j < n_target; j++) used[j] = 0;
15561550
}
15571551

1552+
Rboolean useBytes = FALSE, useUTF8 = FALSE;
15581553
for(R_xlen_t i = 0; i < n_input; i++) {
15591554
if(IS_BYTES(STRING_ELT(input, i))) {
15601555
useBytes = TRUE;
@@ -1576,10 +1571,11 @@ attribute_hidden SEXP do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env)
15761571
}
15771572
}
15781573

1579-
in = (const char **) R_alloc((size_t) n_input, sizeof(char *));
1574+
const char **in, **tar;
1575+
in = (const char **) R_alloc((size_t) n_input, sizeof(char *));
15801576
tar = (const char **) R_alloc((size_t) n_target, sizeof(char *));
15811577
PROTECT(ans = allocVector(INTSXP, n_input));
1582-
ians = INTEGER0(ans);
1578+
int *ians = INTEGER0(ans);
15831579
if(useBytes) {
15841580
for(R_xlen_t i = 0; i < n_input; i++) {
15851581
in[i] = CHAR(STRING_ELT(input, i));
@@ -1643,11 +1639,13 @@ attribute_hidden SEXP do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env)
16431639
const char *ss;
16441640
if (ians[i]) continue;
16451641
ss = in[i];
1646-
temp = strlen(ss);
1642+
size_t temp = strlen(ss);
16471643
if (temp == 0) continue;
1648-
mtch = 0;
1649-
mtch_count = 0;
1644+
int mtch = 0,
1645+
mtch_count = 0;
16501646
for (int j = 0; j < n_target; j++) {
1647+
if (!(((size_t)i * n_target + j) & 0x1fff))
1648+
R_CheckUserInterrupt();
16511649
if (no_dups && used[j]) continue;
16521650
if (strncmp(ss, tar[j], temp) == 0) {
16531651
mtch = j + 1;

0 commit comments

Comments
 (0)