Skip to content

Commit c3ba954

Browse files
Merge pull request #6018 from Anirban166/openmp
Documented the use of OpenMP and parallelization in C code
2 parents 3305e5b + a940c46 commit c3ba954

File tree

16 files changed

+98
-5
lines changed

16 files changed

+98
-5
lines changed

CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,6 @@
6060
# performance testing
6161
/.ci/atime/tests.R @tdhock @Anirban166
6262
/.github/workflows/performance-tests.yaml @Anirban166
63+
64+
# docs
65+
/man/openmp-utils.Rd @Anirban166

man/openmp-utils.Rd

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,18 @@
4141
\item\file{cj.c} - \code{\link{CJ}()}
4242
\item\file{coalesce.c} - \code{\link{fcoalesce}()}
4343
\item\file{fifelse.c} - \code{\link{fifelse}()}
44-
\item\file{fread.c} - \code{\link{fread}()}
44+
\item\file{fread.c} - \code{\link{fread}(). Parallelized across row-based chunks of the file.}
4545
\item\file{forder.c}, \file{fsort.c}, and \file{reorder.c} - \code{\link{forder}()} and related
4646
\item\file{froll.c}, \file{frolladaptive.c}, and \file{frollR.c} - \code{\link{froll}()} and family
47-
\item\file{fwrite.c} - \code{\link{fwrite}()}
48-
\item\file{gsumm.c} - GForce in various places, see \link{GForce}
47+
\item\file{fwrite.c} - \code{\link{fwrite}(). Parallelized across rows.}
48+
\item\file{gsumm.c} - GForce in various places, see \link{GForce}. Parallelized across groups.
4949
\item\file{nafill.c} - \code{\link{nafill}()}
5050
\item\file{subset.c} - Used in \code{\link[=data.table]{[.data.table}} subsetting
5151
\item\file{types.c} - Internal testing usage
5252
}
53+
54+
We endeavor to keep this list up to date, but note that the canonical reference here is the source code itself.
5355
}
5456
\examples{
5557
getDTthreads(verbose=TRUE)
5658
}
57-
\keyword{ data }

src/between.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#include "data.table.h"
22

3+
/*
4+
OpenMP is used here to parallelize:
5+
- The loops that check if each element of the vector provided is between
6+
the specified lower and upper bounds, for INTSXP and REALSXP types
7+
- The checking and handling of undefined values (such as NaNs)
8+
*/
39
SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAboundsArg, SEXP checkArg) {
410
int nprotect = 0;
511
R_len_t nx = length(x), nl = length(lower), nu = length(upper);

src/cj.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#include "data.table.h"
22

3+
/*
4+
OpenMP is used here to parallelize:
5+
- The element assignment in vectors
6+
- The memory copying operations (blockwise replication of data using memcpy)
7+
- The creation of all combinations of the input vectors over the cross-product space
8+
*/
39
SEXP cj(SEXP base_list) {
410
int ncol = LENGTH(base_list);
511
SEXP out = PROTECT(allocVector(VECSXP, ncol));

src/coalesce.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#include "data.table.h"
22

3+
/*
4+
OpenMP is used here to parallelize:
5+
- The operation that iterates over the rows to coalesce the data
6+
- The replacement of NAs with non-NA values from subsequent vectors
7+
- The conditional checks within parallelized loops
8+
*/
39
SEXP coalesce(SEXP x, SEXP inplaceArg) {
410
if (TYPEOF(x)!=VECSXP) internal_error(__func__, "input is list(...) at R level"); // # nocov
511
if (!IS_TRUE_OR_FALSE(inplaceArg)) internal_error(__func__, "argument 'inplaceArg' must be TRUE or FALSE"); // # nocov

src/fifelse.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#include "data.table.h"
22

3+
/*
4+
OpenMP is being used here to parallelize loops that perform conditional
5+
checks along with assignment operations over the elements of the
6+
supplied logical vector based on the condition (test) and values
7+
provided for the remaining arguments (yes, no, and na).
8+
*/
39
SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) {
410
if (!isLogical(l)) {
511
error(_("Argument 'test' must be logical."));

src/forder.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,19 @@ uint64_t dtwiddle(double x) //const void *p, int i)
433433

434434
void radix_r(const int from, const int to, const int radix);
435435

436+
/*
437+
OpenMP is used here to parallelize multiple operations that come together to
438+
sort a data.table using the Radix algorithm. These include:
439+
440+
- The counting of unique values and recursively sorting subsets of data
441+
across different threads
442+
- The process of finding the range and distribution of data for efficient
443+
grouping and sorting
444+
- Creation of histograms which are used to sort data based on significant
445+
bits (each thread processes a separate batch of the data, computes the
446+
MSB of each element, and then increments the corresponding bins), with
447+
the distribution and merging of buckets
448+
*/
436449
SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsArg, SEXP ascArg, SEXP naArg)
437450
// sortGroups TRUE from setkey and regular forder, FALSE from by= for efficiency so strings don't have to be sorted and can be left in appearance order
438451
// when sortGroups is TRUE, ascArg contains +1/-1 for ascending/descending of each by column; when FALSE ascArg is ignored

src/fread.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,14 @@ static int detect_types( const char **pch, int8_t type[], int ncol, bool *bumped
12681268
//
12691269
// Returns 1 if it finishes successfully, and 0 otherwise.
12701270
//
1271+
// OpenMP is used here to:
1272+
// - Parallelize the reading of data in chunks
1273+
// - Avoid race conditions or concurrent writes to the output data.table by having atomic
1274+
// operations on the string data
1275+
// - Manage synchronized updates to the progress bar and serialize the output to the console
1276+
// This function is highly optimized in reading and processing data with both large numbers of
1277+
// rows and columns, but the efficiency is more pronounced across rows.
1278+
//
12711279
//=================================================================================================
12721280
int freadMain(freadMainArgs _args) {
12731281
args = _args; // assign to global for use by DTPRINT() in other functions

src/froll.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
#include "data.table.h"
22

3+
/*
4+
OpenMP is used here to parallelize the loops in frollmean and frollsum.
5+
6+
These functions benefit more in terms of speedup when the data has a large
7+
number of columns, primarily due to the efficient memory access patterns
8+
(cache-friendly) used when processing the data for each column
9+
sequentially in memory to compute the rolling statistic.
10+
*/
11+
312
/* fast rolling mean - router
413
* early stopping for window bigger than input
514
* also handles 'align' in single place

src/fsort.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ int qsort_cmp(const void *a, const void *b) {
9898
return (x<y)-(x>y); // largest first in a safe branchless way casting long to int
9999
}
100100

101+
/*
102+
OpenMP is used here to find the range and distribution of data for efficient
103+
grouping and sorting.
104+
*/
101105
SEXP fsort(SEXP x, SEXP verboseArg) {
102106
double t[10];
103107
t[0] = wallclock();

0 commit comments

Comments
 (0)