-
Notifications
You must be signed in to change notification settings - Fork 1k
Expand file tree
/
Copy pathdata.table.h
More file actions
250 lines (210 loc) · 9.59 KB
/
data.table.h
File metadata and controls
250 lines (210 loc) · 9.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#include "dt_stdio.h" // PRId64 and PRIu64
#include <R.h>
#include <Rversion.h>
#if !defined(R_VERSION) || R_VERSION < R_Version(3, 5, 0) // R-exts$6.14
# define ALTREP(x) 0 // #2866
# define USE_RINTERNALS // #3301
# define DATAPTR_RO(x) ((const void *)DATAPTR(x))
#endif
#include <Rinternals.h>
#define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT
#include <stdint.h> // for uint64_t rather than unsigned long long
#include <stdbool.h>
#include "myomp.h"
#include "types.h"
#include "po.h"
// #include <signal.h> // the debugging machinery + breakpoint aidee
// raise(SIGINT);
// data.table depends on R>=3.0.0 when R_xlen_t was introduced
// Before R 3.0.0, RLEN used to be switched to R_len_t as R_xlen_t wasn't available.
// We could now replace all RLEN with R_xlen_t directly. Or keep RLEN for the shorter
// name so as not to have to check closely one letter difference R_xlen_t/R_len_t. We
// might also undefine R_len_t to ensure not to use it.
typedef R_xlen_t RLEN;
#define IS_UTF8(x) (LEVELS(x) & 8)
#define IS_ASCII(x) (LEVELS(x) & 64)
#define IS_LATIN(x) (LEVELS(x) & 4)
#define IS_TRUE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==TRUE)
#define IS_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==FALSE)
#define IS_TRUE_OR_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]!=NA_LOGICAL)
#define SIZEOF(x) sizes[TYPEOF(x)]
#define TYPEORDER(x) typeorder[x]
#ifdef MIN
# undef MIN
#endif
#define MIN(a,b) (((a)<(b))?(a):(b))
#ifdef MAX
# undef MAX
#endif
#define MAX(a,b) (((a)>(b))?(a):(b))
// for use with bit64::integer64
#define NA_INTEGER64 INT64_MIN
#define MAX_INTEGER64 INT64_MAX
// for use with CPLXSXP, no macro provided by R internals
#define ISNAN_COMPLEX(x) (ISNAN((x).r) || ISNAN((x).i)) // TRUE if either real or imaginary component is NA or NaN
// Backport macros added to R in 2017 so we don't need to update dependency from R 3.0.0
#ifndef MAYBE_SHARED
# define MAYBE_SHARED(x) (NAMED(x) > 1)
#endif
#ifndef MAYBE_REFERENCED
# define MAYBE_REFERENCED(x) ( NAMED(x) > 0 )
#endif
// If we find a non-ASCII, non-NA, non-UTF8 encoding, we try to convert it to UTF8. That is, marked non-ascii/non-UTF8 encodings will
// always be checked in UTF8 locale. This seems to be the best fix Arun could think of to put the encoding issues to rest.
// Since the if-statement will fail with the first condition check in "normal" ASCII cases, there shouldn't be huge penalty issues in
// most cases. Fix for #66, #69, #469 and #1293
// TODO: compare 1.9.6 performance with 1.9.7 with huge number of ASCII strings, and again after Jan 2018 when made macro.
// Matt moved this to be macro in Jan 2018 so that branch can benefit from branch prediction too wherever used inside loops.
// This IS_ASCII will dereference s and that cache fetch is the part that may bite more than the branch, though. Without a call to
// to ENC2UTF as all, the pointer value can just be compared by the calling code without deferencing it. It may still be worth
// timing the impact and manually avoiding (is there an IS_ASCII on the character vector rather than testing each item every time?)
#define NEED2UTF8(s) !(IS_ASCII(s) || (s)==NA_STRING || IS_UTF8(s))
#define ENC2UTF8(s) (!NEED2UTF8(s) ? (s) : mkCharCE(translateCharUTF8(s), CE_UTF8))
// init.c
extern SEXP char_integer64;
extern SEXP char_ITime;
extern SEXP char_IDate;
extern SEXP char_Date;
extern SEXP char_POSIXct;
extern SEXP char_nanotime;
extern SEXP char_lens;
extern SEXP char_indices;
extern SEXP char_allLen1;
extern SEXP char_allGrp1;
extern SEXP char_factor;
extern SEXP char_ordered;
extern SEXP char_datatable;
extern SEXP char_dataframe;
extern SEXP char_NULL;
extern SEXP sym_sorted;
extern SEXP sym_index;
extern SEXP sym_BY;
extern SEXP sym_starts, char_starts;
extern SEXP sym_maxgrpn;
extern SEXP sym_colClassesAs;
extern SEXP sym_verbose;
extern SEXP SelfRefSymbol;
extern SEXP sym_inherits;
extern SEXP sym_datatable_locked;
extern double NA_INT64_D;
extern long long NA_INT64_LL;
extern Rcomplex NA_CPLX; // initialized in init.c; see there for comments
extern size_t sizes[100]; // max appears to be FUNSXP = 99, see Rinternals.h
extern size_t typeorder[100];
long long DtoLL(double x);
double LLtoD(long long x);
bool GetVerbose();
// cj.c
SEXP cj(SEXP base_list);
// dogroups.c
SEXP keepattr(SEXP to, SEXP from);
SEXP growVector(SEXP x, R_len_t newlen);
// assign.c
SEXP allocNAVector(SEXPTYPE type, R_len_t n);
SEXP allocNAVectorLike(SEXP x, R_len_t n);
void writeNA(SEXP v, const int from, const int n);
void savetl_init(), savetl(SEXP s), savetl_end();
int checkOverAlloc(SEXP x);
// forder.c
int StrCmp(SEXP x, SEXP y);
uint64_t dtwiddle(void *p, int i);
SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg);
int getNumericRounding_C();
// reorder.c
SEXP reorder(SEXP x, SEXP order);
SEXP setcolorder(SEXP x, SEXP o);
// subset.c
void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA);
SEXP subsetVector(SEXP x, SEXP idx);
// fcast.c
SEXP int_vec_init(R_len_t n, int val);
// vecseq.c
SEXP vecseq(SEXP x, SEXP len, SEXP clamp);
// uniqlist.c
SEXP uniqlist(SEXP l, SEXP order);
SEXP uniqlengths(SEXP x, SEXP n);
// chmatch.c
SEXP chmatch(SEXP x, SEXP table, int nomatch);
SEXP chin(SEXP x, SEXP table);
SEXP isOrderedSubset(SEXP, SEXP);
void setselfref(SEXP);
// fmelt.c
SEXP seq_int(int n, int start);
SEXP set_diff(SEXP x, int n);
SEXP which(SEXP x, Rboolean val);
// frank.c
SEXP dt_na(SEXP x, SEXP cols);
// assign.c
SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose);
const char *memrecycle(const SEXP target, const SEXP where, const int r, const int len, SEXP source, const int sourceStart, const int sourceLen, const int coln, const char *colname);
SEXP shallowwrapper(SEXP dt, SEXP cols);
SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols,
SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts,
SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames,
SEXP on, SEXP verbose);
// bmerge.c
SEXP bmerge(SEXP iArg, SEXP xArg, SEXP icolsArg, SEXP xcolsArg, SEXP isorted,
SEXP xoArg, SEXP rollarg, SEXP rollendsArg, SEXP nomatchArg,
SEXP multArg, SEXP opArg, SEXP nqgrpArg, SEXP nqmaxgrpArg);
// quickselect
double dquickselect(double *x, int n);
double iquickselect(int *x, int n);
double i64quickselect(int64_t *x, int n);
// fread.c
double wallclock();
// openmp-utils.c
void initDTthreads();
int getDTthreads();
void avoid_openmp_hang_within_fork();
// froll.c
void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose);
// frolladaptive.c
void fadaptiverollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
// frollR.c
SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive);
SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho);
// nafill.c
void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose);
void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill, ans_t *ans, bool verbose);
SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, SEXP cols);
// between.c
SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds, SEXP check);
// coalesce.c
SEXP coalesce(SEXP x, SEXP inplace);
// utils.c
bool isRealReallyInt(SEXP x);
SEXP isReallyReal(SEXP x);
bool allNA(SEXP x, bool errorForBadType);
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups);
void coerceFill(SEXP fill, double *dfill, int32_t *ifill, int64_t *i64fill);
SEXP coerceFillR(SEXP fill);
bool INHERITS(SEXP x, SEXP char_);
bool Rinherits(SEXP x, SEXP char_);
SEXP copyAsPlain(SEXP x);
void copySharedColumns(SEXP x);
SEXP lock(SEXP x);
SEXP unlock(SEXP x);
bool islocked(SEXP x);
SEXP islockedR(SEXP x);
bool need2utf8(SEXP x);
SEXP coerceUtf8IfNeeded(SEXP x);
// types.c
char *end(char *start);
void ansMsg(ans_t *ans, int n, bool verbose, const char *func);
SEXP testMsgR(SEXP status, SEXP x, SEXP k);
//fifelse.c
SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na);
SEXP fcaseR(SEXP na, SEXP rho, SEXP args);
// mergelist.c
SEXP cbindlist(SEXP x, SEXP copyArg);