Skip to content

Commit d3289b7

Browse files
Sugar function 'trimws' with unit tests (closes #679)
1 parent 7492cff commit d3289b7

File tree

6 files changed

+364
-0
lines changed

6 files changed

+364
-0
lines changed

ChangeLog

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
2017-04-22 Nathan Russell <[email protected]>
2+
3+
* inst/include/Rcpp/sugar/functions/strings/trimws.h: Added sugar
4+
function trimws with unit tests
5+
* inst/include/Rcpp/sugar/functions/strings/strings.h: Idem
6+
* inst/unitTests/cpp/sugar.cpp: Idem
7+
* inst/unitTests/runit.sugar.R: Idem
8+
19
2017-04-20 Dirk Eddelbuettel <[email protected]>
210

311
* DESCRIPTION (Version, Date): Roll minor version

inst/NEWS.Rd

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
(James Balamuta in \ghpr{661} addressing \ghit{628}, \ghit{563},
2626
\ghit{552}, \ghit{460}, \ghit{419}, and \ghit{251}).
2727
}
28+
\item Changes in Rcpp Sugar:
29+
\itemize{
30+
\item Added sugar function \code{trimws} (Nathan Russell in \ghpr{680}
31+
addressing \ghit{679}).
32+
}
2833
}
2934
}
3035

inst/include/Rcpp/sugar/functions/strings/strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@
2323
#define RCPP_SUGAR_FUNCTIONS_STRINGS_H
2424

2525
#include <Rcpp/sugar/functions/strings/collapse.h>
26+
#include <Rcpp/sugar/functions/strings/trimws.h>
2627

2728
#endif
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
2+
//
3+
// trimws.h: Rcpp R/C++ interface class library -- trimws
4+
//
5+
// Copyright (C) 2017 Nathan Russell
6+
//
7+
// This file is part of Rcpp.
8+
//
9+
// Rcpp is free software: you can redistribute it and/or modify it
10+
// under the terms of the GNU General Public License as published by
11+
// the Free Software Foundation, either version 2 of the License, or
12+
// (at your option) any later version.
13+
//
14+
// Rcpp is distributed in the hope that it will be useful, but
15+
// WITHOUT ANY WARRANTY; without even the implied warranty of
16+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+
// GNU General Public License for more details.
18+
//
19+
// You should have received a copy of the GNU General Public License
20+
// along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
21+
22+
#ifndef Rcpp__sugar__trimws_h
23+
#define Rcpp__sugar__trimws_h
24+
25+
#include <string>
26+
#include <cstring>
27+
28+
namespace Rcpp {
29+
namespace sugar {
30+
namespace detail {
31+
32+
33+
/* NB: std::isspace is not used because it also counts
34+
'\f' and '\v' as whitespace, whereas base::trimws only
35+
checks for ' ', '\t', '\r', and '\n' */
36+
inline bool isws(const char c) {
37+
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
38+
}
39+
40+
inline const char* trim_left(const char* str) {
41+
static std::string buff;
42+
43+
if (!str) {
44+
return "";
45+
}
46+
47+
buff.clear();
48+
char c = *str;
49+
50+
while (isws(c)) {
51+
++str;
52+
c = *str;
53+
}
54+
55+
buff.append(str);
56+
return buff.c_str();
57+
}
58+
59+
inline const char* trim_right(const char* str) {
60+
static std::string buff;
61+
62+
if (!str) {
63+
return "";
64+
}
65+
66+
buff.clear();
67+
std::size_t sz = std::strlen(str);
68+
69+
const char* ptr = str + sz - 1;
70+
char c = *ptr;
71+
72+
for (; ptr > str && isws(c); c = *ptr) {
73+
--sz; --ptr;
74+
}
75+
76+
buff.append(str, sz - isws(*ptr));
77+
return buff.c_str();
78+
}
79+
80+
inline const char* trim_both(const char* str) {
81+
static std::string buff;
82+
83+
if (!str) {
84+
return "";
85+
}
86+
87+
buff.clear();
88+
char c = *str;
89+
90+
while (isws(c)) {
91+
++str;
92+
c = *str;
93+
}
94+
95+
std::size_t sz = std::strlen(str);
96+
const char* ptr = str + sz - 1;
97+
c = *ptr;
98+
99+
for (; ptr >= str; c = *ptr, --sz, --ptr) {
100+
if (!isws(c)) {
101+
break;
102+
}
103+
}
104+
105+
buff.append(str, sz + 1);
106+
return buff.c_str();
107+
}
108+
109+
110+
} // detail
111+
} // sugar
112+
113+
114+
inline Vector<STRSXP> trimws(const Vector<STRSXP>& x, const char* which = "both") {
115+
typedef const char* (*trim_function)(const char*);
116+
trim_function trim = NULL;
117+
118+
if (*which == 'b') {
119+
trim = sugar::detail::trim_both;
120+
} else if (*which == 'l') {
121+
trim = sugar::detail::trim_left;
122+
} else if (*which == 'r') {
123+
trim = sugar::detail::trim_right;
124+
} else {
125+
stop("Invalid `which` argument '%s'!", which);
126+
return Vector<STRSXP>::create("Unreachable");
127+
}
128+
129+
R_xlen_t i = 0, sz = x.size();
130+
Vector<STRSXP> res(sz);
131+
132+
for (; i < sz; i++) {
133+
if (traits::is_na<STRSXP>(x[i])) {
134+
res[i] = x[i];
135+
} else {
136+
res[i] = (*trim)(x[i]);
137+
}
138+
}
139+
140+
return res;
141+
}
142+
143+
inline Matrix<STRSXP> trimws(const Matrix<STRSXP>& x, const char* which = "both") {
144+
typedef const char* (*trim_function)(const char*);
145+
trim_function trim = NULL;
146+
147+
if (*which == 'b') {
148+
trim = sugar::detail::trim_both;
149+
} else if (*which == 'l') {
150+
trim = sugar::detail::trim_left;
151+
} else if (*which == 'r') {
152+
trim = sugar::detail::trim_right;
153+
} else {
154+
stop("Invalid `which` argument '%s'!", which);
155+
return Matrix<STRSXP>();
156+
}
157+
158+
R_xlen_t i = 0, sz = x.size();
159+
Matrix<STRSXP> res(x.nrow(), x.ncol());
160+
161+
for (; i < sz; i++) {
162+
if (traits::is_na<STRSXP>(x[i])) {
163+
res[i] = x[i];
164+
} else {
165+
res[i] = (*trim)(x[i]);
166+
}
167+
}
168+
169+
return res;
170+
}
171+
172+
inline String trimws(const String& str, const char* which = "both") {
173+
if (*which == 'b') {
174+
if (traits::is_na<STRSXP>(str.get_sexp())) {
175+
return String(str.get_sexp());
176+
}
177+
return sugar::detail::trim_both(str.get_cstring());
178+
}
179+
180+
if (*which == 'l') {
181+
if (traits::is_na<STRSXP>(str.get_sexp())) {
182+
return String(str.get_sexp());
183+
}
184+
return sugar::detail::trim_left(str.get_cstring());
185+
}
186+
187+
if (*which == 'r') {
188+
if (traits::is_na<STRSXP>(str.get_sexp())) {
189+
return String(str.get_sexp());
190+
}
191+
return sugar::detail::trim_right(str.get_cstring());
192+
}
193+
194+
stop("Invalid `which` argument '%s'!", which);
195+
return String("Unreachable");
196+
}
197+
198+
199+
} // Rcpp
200+
201+
#endif // Rcpp__sugar__trimws_h

inst/unitTests/cpp/sugar.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,3 +1201,21 @@ LogicalMatrix UpperTri(NumericMatrix x, bool diag = false) {
12011201
LogicalMatrix LowerTri(NumericMatrix x, bool diag = false) {
12021202
return lower_tri(x, diag);
12031203
}
1204+
1205+
1206+
// 22 April 2017: trimws
1207+
1208+
// [[Rcpp::export]]
1209+
CharacterVector vtrimws(CharacterVector x, const char* which = "both") {
1210+
return trimws(x, which);
1211+
}
1212+
1213+
// [[Rcpp::export]]
1214+
CharacterMatrix mtrimws(CharacterMatrix x, const char* which = "both") {
1215+
return trimws(x, which);
1216+
}
1217+
1218+
// [[Rcpp::export]]
1219+
String strimws(String x, const char* which = "both") {
1220+
return trimws(x, which);
1221+
}

inst/unitTests/runit.sugar.R

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,4 +2048,135 @@ if (.runThisTest) {
20482048

20492049
}
20502050

2051+
2052+
## 22 April 2017
2053+
## trimws -- vector
2054+
test.sugar.vtrimws <- function() {
2055+
2056+
x <- c(
2057+
" a b c", "a b c ", " a b c ",
2058+
"\t\ta b c", "a b c\t\t", "\t\ta b c\t\t",
2059+
"\r\ra b c", "a b c\r\r", "\r\ra b c\r\r",
2060+
"\n\na b c", "a b c\n\n", "\n\na b c\n\n",
2061+
NA, "", " ", " \t\r\n ", "\n \t \r "
2062+
)
2063+
2064+
checkEquals(
2065+
vtrimws(x), trimws(x),
2066+
"vtrimws / which = 'both'"
2067+
)
2068+
2069+
checkEquals(
2070+
vtrimws(x, 'l'), trimws(x, 'l'),
2071+
"vtrimws / which = 'left'"
2072+
)
2073+
2074+
checkEquals(
2075+
vtrimws(x, 'r'), trimws(x, 'r'),
2076+
"vtrimws / which = 'right'"
2077+
)
2078+
2079+
checkException(
2080+
vtrimws(x, "invalid"),
2081+
msg = "vtrimws -- bad `which` argument"
2082+
)
2083+
2084+
}
2085+
2086+
2087+
## trimws -- matrix
2088+
test.sugar.mtrimws <- function() {
2089+
2090+
x <- c(
2091+
" a b c", "a b c ", " a b c ",
2092+
"\t\ta b c", "a b c\t\t", "\t\ta b c\t\t",
2093+
"\r\ra b c", "a b c\r\r", "\r\ra b c\r\r",
2094+
"\n\na b c", "a b c\n\n", "\n\na b c\n\n",
2095+
NA, "", " ", " \t\r\n ", "\n \t \r "
2096+
)
2097+
x <- matrix(x, nrow = length(x), ncol = 4)
2098+
2099+
checkEquals(
2100+
mtrimws(x), trimws(x),
2101+
"mtrimws / which = 'both'"
2102+
)
2103+
2104+
checkEquals(
2105+
mtrimws(x, 'l'), trimws(x, 'l'),
2106+
"mtrimws / which = 'left'"
2107+
)
2108+
2109+
checkEquals(
2110+
mtrimws(x, 'r'), trimws(x, 'r'),
2111+
"mtrimws / which = 'right'"
2112+
)
2113+
2114+
checkException(
2115+
mtrimws(x, "invalid"),
2116+
msg = "mtrimws -- bad `which` argument"
2117+
)
2118+
2119+
}
2120+
2121+
2122+
## trimws -- String
2123+
test.sugar.strimws <- function() {
2124+
2125+
x <- c(
2126+
" a b c", "a b c ", " a b c ",
2127+
"\t\ta b c", "a b c\t\t", "\t\ta b c\t\t",
2128+
"\r\ra b c", "a b c\r\r", "\r\ra b c\r\r",
2129+
"\n\na b c", "a b c\n\n", "\n\na b c\n\n",
2130+
NA, "", " ", " \t\r\n ", "\n \t \r "
2131+
)
2132+
2133+
lhs <- vapply(
2134+
x, strimws, character(1),
2135+
USE.NAMES = FALSE
2136+
)
2137+
rhs <- vapply(
2138+
x, trimws, character(1),
2139+
USE.NAMES = FALSE
2140+
)
2141+
2142+
checkEquals(
2143+
lhs, rhs,
2144+
"strimws / which = 'both'"
2145+
)
2146+
2147+
lhs <- vapply(
2148+
x, strimws, character(1),
2149+
which = 'l', USE.NAMES = FALSE
2150+
)
2151+
rhs <- vapply(
2152+
x, trimws, character(1),
2153+
which = 'l', USE.NAMES = FALSE
2154+
)
2155+
2156+
checkEquals(
2157+
lhs, rhs,
2158+
"strimws / which = 'left'"
2159+
)
2160+
2161+
lhs <- vapply(
2162+
x, strimws, character(1),
2163+
which = 'r', USE.NAMES = FALSE
2164+
)
2165+
rhs <- vapply(
2166+
x, trimws, character(1),
2167+
which = 'r', USE.NAMES = FALSE
2168+
)
2169+
2170+
checkEquals(
2171+
lhs, rhs,
2172+
"strimws / which = 'right'"
2173+
)
2174+
2175+
checkException(
2176+
strimws(x[1], "invalid"),
2177+
msg = "strimws -- bad `which` argument"
2178+
)
2179+
2180+
}
2181+
20512182
}

0 commit comments

Comments
 (0)