Skip to content

Commit fcbfcc6

Browse files
committed
Merge pull request #425 from nathan-russell/master
Sugar median with unit tests
2 parents 05b11bf + 00e8737 commit fcbfcc6

File tree

4 files changed

+438
-0
lines changed

4 files changed

+438
-0
lines changed

inst/include/Rcpp/sugar/functions/functions.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,7 @@
8080
#include <Rcpp/sugar/functions/cummin.h>
8181
#include <Rcpp/sugar/functions/cummax.h>
8282

83+
#include <Rcpp/sugar/functions/median.h>
84+
8385
#endif
8486

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
2+
//
3+
// median.h: Rcpp R/C++ interface class library -- median
4+
//
5+
// Copyright (C) 2016 Dirk Eddelbuettel, Romain Francois, and Nathan Russell
6+
//
7+
// This file is part of Rcpp.
8+
//
9+
// Rcpp is free software: you can redistribute it and/or modify it
10+
// under the terms of the GNU General Public License as published by
11+
// the Free Software Foundation, either version 2 of the License, or
12+
// (at your option) any later version.
13+
//
14+
// Rcpp is distributed in the hope that it will be useful, but
15+
// WITHOUT ANY WARRANTY; without even the implied warranty of
16+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+
// GNU General Public License for more details.
18+
//
19+
// You should have received a copy of the GNU General Public License
20+
// along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
21+
22+
#ifndef Rcpp__sugar__median_h
23+
#define Rcpp__sugar__median_h
24+
25+
namespace Rcpp {
26+
namespace sugar {
27+
namespace median_detail {
28+
29+
// need to return double for integer vectors
30+
// (in case of even-length input vector)
31+
// and Rcpp::String for STRSXP
32+
// also need to return NA_REAL for
33+
// integer vector yielding NA result
34+
template <int RTYPE>
35+
struct result {
36+
typedef typename Rcpp::traits::storage_type<RTYPE>::type type;
37+
enum { rtype = RTYPE };
38+
};
39+
40+
template <>
41+
struct result<INTSXP> {
42+
typedef double type;
43+
enum { rtype = REALSXP };
44+
};
45+
46+
template <>
47+
struct result<STRSXP> {
48+
typedef Rcpp::String type;
49+
enum { rtype = STRSXP };
50+
};
51+
52+
// std::nth_element and std::max_element don't
53+
// know how to compare Rcomplex values
54+
template <typename T>
55+
inline bool less(T lhs, T rhs) {
56+
return lhs < rhs;
57+
}
58+
59+
template<>
60+
inline bool less<Rcomplex>(Rcomplex lhs, Rcomplex rhs) {
61+
if (lhs.r < rhs.r) return true;
62+
if (lhs.i < rhs.i) return true;
63+
return false;
64+
}
65+
66+
// compiler does not know how to handle
67+
// Rcomplex numerator / double denominator
68+
// and need explicit cast for INTSXP case
69+
inline double half(double lhs) {
70+
return lhs / 2.0;
71+
}
72+
73+
inline double half(int lhs) {
74+
return static_cast<double>(lhs) / 2.0;
75+
}
76+
77+
inline Rcomplex half(Rcomplex lhs) {
78+
lhs.r /= 2.0;
79+
lhs.i /= 2.0;
80+
return lhs;
81+
}
82+
83+
} // median_detail
84+
85+
// base case
86+
template <int RTYPE, bool NA, typename T, bool NA_RM = false>
87+
class Median {
88+
public:
89+
typedef typename median_detail::result<RTYPE>::type result_type;
90+
typedef typename Rcpp::traits::storage_type<RTYPE>::type stored_type;
91+
enum { RESULT_RTYPE = median_detail::result<RTYPE>::rtype };
92+
typedef T VECTOR;
93+
94+
private:
95+
VECTOR x;
96+
97+
public:
98+
Median(const VECTOR& xx)
99+
: x(Rcpp::clone(xx)) {}
100+
101+
operator result_type() {
102+
if (x.size() < 1) {
103+
return Rcpp::traits::get_na<RESULT_RTYPE>();
104+
}
105+
106+
if (Rcpp::any(Rcpp::is_na(x))) {
107+
return Rcpp::traits::get_na<RESULT_RTYPE>();
108+
}
109+
110+
R_xlen_t n = x.size() / 2;
111+
std::nth_element(
112+
x.begin(), x.begin() + n, x.end(),
113+
median_detail::less<stored_type>);
114+
115+
if (x.size() % 2) return x[n];
116+
return median_detail::half(
117+
x[n] + *std::max_element(
118+
x.begin(), x.begin() + n,
119+
median_detail::less<stored_type>));
120+
}
121+
};
122+
123+
// na.rm = TRUE
124+
template <int RTYPE, bool NA, typename T>
125+
class Median<RTYPE, NA, T, true> {
126+
public:
127+
typedef typename median_detail::result<RTYPE>::type result_type;
128+
typedef typename Rcpp::traits::storage_type<RTYPE>::type stored_type;
129+
enum { RESULT_RTYPE = median_detail::result<RTYPE>::rtype };
130+
typedef T VECTOR;
131+
132+
private:
133+
VECTOR x;
134+
135+
public:
136+
Median(const VECTOR& xx)
137+
: x(Rcpp::na_omit(Rcpp::clone(xx))) {}
138+
139+
operator result_type() {
140+
if (!x.size()) {
141+
return Rcpp::traits::get_na<RESULT_RTYPE>();
142+
}
143+
144+
R_xlen_t n = x.size() / 2;
145+
std::nth_element(
146+
x.begin(), x.begin() + n, x.end(),
147+
median_detail::less<stored_type>);
148+
149+
if (x.size() % 2) return x[n];
150+
return median_detail::half(
151+
x[n] + *std::max_element(
152+
x.begin(), x.begin() + n,
153+
median_detail::less<stored_type>));
154+
}
155+
};
156+
157+
// NA = false
158+
template <int RTYPE, typename T, bool NA_RM>
159+
class Median<RTYPE, false, T, NA_RM> {
160+
public:
161+
typedef typename median_detail::result<RTYPE>::type result_type;
162+
typedef typename Rcpp::traits::storage_type<RTYPE>::type stored_type;
163+
enum { RESULT_RTYPE = median_detail::result<RTYPE>::rtype };
164+
typedef T VECTOR;
165+
166+
private:
167+
VECTOR x;
168+
169+
public:
170+
Median(const VECTOR& xx)
171+
: x(Rcpp::clone(xx)) {}
172+
173+
operator result_type() {
174+
if (x.size() < 1) {
175+
return Rcpp::traits::get_na<RESULT_RTYPE>();
176+
}
177+
178+
R_xlen_t n = x.size() / 2;
179+
std::nth_element(
180+
x.begin(), x.begin() + n, x.end(),
181+
median_detail::less<stored_type>);
182+
183+
if (x.size() % 2) return x[n];
184+
return median_detail::half(
185+
x[n] + *std::max_element(
186+
x.begin(), x.begin() + n,
187+
median_detail::less<stored_type>));
188+
}
189+
};
190+
191+
// specialize for character vector
192+
// due to string_proxy's incompatibility
193+
// with certain std:: algorithms;
194+
// need to return NA for even-length vectors
195+
template <bool NA, typename T, bool NA_RM>
196+
class Median<STRSXP, NA, T, NA_RM> {
197+
public:
198+
typedef typename median_detail::result<STRSXP>::type result_type;
199+
typedef typename Rcpp::traits::storage_type<STRSXP>::type stored_type;
200+
typedef T VECTOR;
201+
202+
private:
203+
VECTOR x;
204+
205+
public:
206+
Median(const VECTOR& xx)
207+
: x(Rcpp::clone(xx)) {}
208+
209+
operator result_type() {
210+
if (!(x.size() % 2)) {
211+
return Rcpp::traits::get_na<STRSXP>();
212+
}
213+
214+
if (Rcpp::any(Rcpp::is_na(x))) {
215+
return Rcpp::traits::get_na<STRSXP>();
216+
}
217+
218+
R_xlen_t n = x.size() / 2;
219+
x.sort();
220+
221+
return x[n];
222+
}
223+
};
224+
225+
// na.rm = TRUE
226+
template <bool NA, typename T>
227+
class Median<STRSXP, NA, T, true> {
228+
public:
229+
typedef typename median_detail::result<STRSXP>::type result_type;
230+
typedef typename Rcpp::traits::storage_type<STRSXP>::type stored_type;
231+
typedef T VECTOR;
232+
233+
private:
234+
VECTOR x;
235+
236+
public:
237+
Median(const VECTOR& xx)
238+
: x(Rcpp::na_omit(Rcpp::clone(xx))) {}
239+
240+
operator result_type() {
241+
if (!(x.size() % 2)) {
242+
return Rcpp::traits::get_na<STRSXP>();
243+
}
244+
245+
R_xlen_t n = x.size() / 2;
246+
x.sort();
247+
248+
return x[n];
249+
}
250+
};
251+
252+
// NA = false
253+
template <typename T>
254+
class Median<STRSXP, false, T, true> {
255+
public:
256+
typedef typename median_detail::result<STRSXP>::type result_type;
257+
typedef typename Rcpp::traits::storage_type<STRSXP>::type stored_type;
258+
typedef T VECTOR;
259+
260+
private:
261+
VECTOR x;
262+
263+
public:
264+
Median(const VECTOR& xx)
265+
: x(Rcpp::clone(xx)) {}
266+
267+
operator result_type() {
268+
if (!(x.size() % 2)) {
269+
return Rcpp::traits::get_na<STRSXP>();
270+
}
271+
272+
R_xlen_t n = x.size() / 2;
273+
x.sort();
274+
275+
return x[n];
276+
}
277+
};
278+
279+
} // sugar
280+
281+
template <int RTYPE, bool NA, typename T>
282+
inline typename sugar::median_detail::result<RTYPE>::type
283+
median(const Rcpp::VectorBase<RTYPE, NA, T>& x, bool na_rm = false) {
284+
if (!na_rm) return sugar::Median<RTYPE, NA, T, false>(x);
285+
return sugar::Median<RTYPE, NA, T, true>(x);
286+
}
287+
288+
} // Rcpp
289+
290+
#endif // Rcpp__sugar__median_h

inst/unitTests/cpp/sugar.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,3 +716,25 @@ IntegerVector runit_cummax_iv(IntegerVector xx){
716716
}
717717

718718

719+
// 18 January 2016: median
720+
721+
// [[Rcpp::export]]
722+
double median_dbl(Rcpp::NumericVector x, bool na_rm = false) {
723+
return Rcpp::median(x, na_rm);
724+
}
725+
726+
// [[Rcpp::export]]
727+
double median_int(Rcpp::IntegerVector x, bool na_rm = false) {
728+
return Rcpp::median(x, na_rm);
729+
}
730+
731+
// [[Rcpp::export]]
732+
Rcomplex median_cx(Rcpp::ComplexVector x, bool na_rm = false) {
733+
return Rcpp::median(x, na_rm);
734+
}
735+
736+
// [[Rcpp::export]]
737+
Rcpp::String median_ch(Rcpp::CharacterVector x, bool na_rm = false) {
738+
return Rcpp::median(x, na_rm);
739+
}
740+

0 commit comments

Comments
 (0)