Skip to content

Commit 6d44906

Browse files
committed
Switch the order of gs and ls to conform to what the underlying APIs use.
1 parent 61fe479 commit 6d44906

13 files changed

+35
-35
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ set_target_properties(gpuarray PROPERTIES
8888
INSTALL_NAME_DIR ${CMAKE_INSTALL_PREFIX}/lib
8989
MACOSX_RPATH OFF
9090
# This is the shared library version
91-
VERSION 1.0
91+
VERSION 2.0
9292
)
9393

9494
add_library(gpuarray-static STATIC ${GPUARRAY_SRC})

src/gpuarray/buffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,15 +482,15 @@ GPUARRAY_PUBLIC int gpukernel_setarg(gpukernel *k, unsigned int i, void *a);
482482
*
483483
* \param k kernel
484484
* \param n number of dimensions of grid/block
485-
* \param bs block sizes for this call (also known as local size)
486485
* \param gs grid sizes for this call (also known as global size)
486+
* \param ls block sizes for this call (also known as local size)
487487
* \param shared amount of dynamic shared memory to reserve
488488
* \param args table of pointers to each argument (optional).
489489
*
490490
* \returns GA_NO_ERROR or an error code if an error occurred.
491491
*/
492492
GPUARRAY_PUBLIC int gpukernel_call(gpukernel *k, unsigned int n,
493-
const size_t *ls, const size_t *gs,
493+
const size_t *gs, const size_t *ls,
494494
size_t shared, void **args);
495495

496496
/**

src/gpuarray/config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
/* The following included file should have been generated by CMake. */
55
#include <gpuarray/abi_version.h>
6-
#define GPUARRAY_API_VERSION 0
6+
#define GPUARRAY_API_VERSION 1
77

88
#ifdef GPUARRAY_SHARED
99
#ifdef _WIN32

src/gpuarray/kernel.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,24 +87,24 @@ GPUARRAY_PUBLIC int GpuKernel_setarg(GpuKernel *k, unsigned int i, void *val);
8787
*
8888
* \param k the kernel to schedule for
8989
* \param n number of elements to handle
90-
* \param ls local size (in/out)
9190
* \param gs grid size (in/out)
91+
* \param ls local size (in/out)
9292
*/
9393
GPUARRAY_PUBLIC int GpuKernel_sched(GpuKernel *k, size_t n,
94-
size_t *ls, size_t *gs);
94+
size_t *gs, size_t *ls);
9595

9696
/**
9797
* Launch the execution of a kernel.
9898
*
9999
* \param k the kernel to launch
100100
* \param n dimensionality of the grid/blocks
101-
* \param ls sizes of launch blocks
102101
* \param gs sizes of launch grid
102+
* \param ls sizes of launch blocks
103103
* \param amount of dynamic shared memory to allocate
104104
* \param args table of pointers to arguments
105105
*/
106106
GPUARRAY_PUBLIC int GpuKernel_call(GpuKernel *k, unsigned int n,
107-
const size_t *ls, const size_t *gs,
107+
const size_t *gs, const size_t *ls,
108108
size_t shared, void **args);
109109

110110
GPUARRAY_PUBLIC int GpuKernel_binary(const GpuKernel *k, size_t *sz,

src/gpuarray_array.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ int GpuArray_take1(GpuArray *a, const GpuArray *v, const GpuArray *i,
487487
if (err != GA_NO_ERROR)
488488
return err;
489489

490-
err = GpuKernel_sched(&k, n[0]*n[1], &ls[1], &gs[1]);
490+
err = GpuKernel_sched(&k, n[0]*n[1], &gs[1], &ls[1]);
491491
if (err != GA_NO_ERROR)
492492
goto out;
493493

@@ -521,7 +521,7 @@ int GpuArray_take1(GpuArray *a, const GpuArray *v, const GpuArray *i,
521521
GpuKernel_setarg(&k, argp++, &n[1]);
522522
GpuKernel_setarg(&k, argp++, errbuf);
523523

524-
err = GpuKernel_call(&k, 2, ls, gs, 0, NULL);
524+
err = GpuKernel_call(&k, 2, gs, ls, 0, NULL);
525525
if (check_error && err == GA_NO_ERROR) {
526526
err = gpudata_read(&kerr, errbuf, 0, sizeof(int));
527527
if (err == GA_NO_ERROR && kerr != 0) {

src/gpuarray_blas_cuda_cublas.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,9 +1099,9 @@ static int sgemvBatch(cb_order order, cb_transpose transA,
10991099
args[8] = &N;
11001100

11011101
if (transA == cb_no_trans) {
1102-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgemvBH_N_a1_b1_small, 2, ls, gs, 0, args);
1102+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgemvBH_N_a1_b1_small, 2, gs, ls, 0, args);
11031103
} else {
1104-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgemvBH_T_a1_b1_small, 2, ls, gs, 0, args);
1104+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgemvBH_T_a1_b1_small, 2, gs, ls, 0, args);
11051105
}
11061106

11071107
cuda_ops.buffer_release(Aa);
@@ -1223,9 +1223,9 @@ static int dgemvBatch(cb_order order, cb_transpose transA,
12231223
args[8] = &N;
12241224

12251225
if (transA == cb_no_trans) {
1226-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->dgemvBH_N_a1_b1_small, 2, ls, gs, 0, args);
1226+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->dgemvBH_N_a1_b1_small, 2, gs, ls, 0, args);
12271227
} else {
1228-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->dgemvBH_T_a1_b1_small, 2, ls, gs, 0, args);
1228+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->dgemvBH_T_a1_b1_small, 2, gs, ls, 0, args);
12291229
}
12301230

12311231
cuda_ops.buffer_release(Aa);
@@ -1486,7 +1486,7 @@ static int sgerBatch(cb_order order, size_t M, size_t N, float alpha,
14861486
args[8] = &M;
14871487
args[9] = &N;
14881488

1489-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgerBH_gen_small, 3, ls, gs, 0, args);
1489+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgerBH_gen_small, 3, gs, ls, 0, args);
14901490

14911491
cuda_ops.buffer_release(Aa);
14921492
cuda_ops.buffer_release(xa);
@@ -1618,7 +1618,7 @@ static int dgerBatch(cb_order order, size_t M, size_t N, double alpha,
16181618
args[8] = &M;
16191619
args[9] = &N;
16201620

1621-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgerBH_gen_small, 3, ls, gs, 0, args);
1621+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgerBH_gen_small, 3, gs, ls, 0, args);
16221622

16231623
cuda_ops.buffer_release(Aa);
16241624
cuda_ops.buffer_release(xa);

src/gpuarray_buffer.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,9 @@ int gpukernel_setarg(gpukernel *k, unsigned int i, void *a) {
180180
return ((partial_gpukernel *)k)->ctx->ops->kernel_setarg(k, i, a);
181181
}
182182

183-
int gpukernel_call(gpukernel *k, unsigned int n, const size_t *ls,
184-
const size_t *gs, size_t shared, void **args) {
185-
return ((partial_gpukernel *)k)->ctx->ops->kernel_call(k, n, ls, gs,
183+
int gpukernel_call(gpukernel *k, unsigned int n, const size_t *gs,
184+
const size_t *ls, size_t shared, void **args) {
185+
return ((partial_gpukernel *)k)->ctx->ops->kernel_call(k, n, gs, ls,
186186
shared, args);
187187
}
188188

src/gpuarray_buffer_cuda.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,7 +1237,7 @@ static int cuda_kernelsetarg(gpukernel *k, unsigned int i, void *arg) {
12371237
}
12381238

12391239
static int cuda_callkernel(gpukernel *k, unsigned int n,
1240-
const size_t *bs, const size_t *gs,
1240+
const size_t *gs, const size_t *ls,
12411241
size_t shared, void **args) {
12421242
cuda_context *ctx = k->ctx;
12431243
unsigned int i;
@@ -1258,15 +1258,15 @@ static int cuda_callkernel(gpukernel *k, unsigned int n,
12581258

12591259
switch (n) {
12601260
case 1:
1261-
ctx->err = cuLaunchKernel(k->k, gs[0], 1, 1, bs[0], 1, 1, shared,
1261+
ctx->err = cuLaunchKernel(k->k, gs[0], 1, 1, ls[0], 1, 1, shared,
12621262
ctx->s, args, NULL);
12631263
break;
12641264
case 2:
1265-
ctx->err = cuLaunchKernel(k->k, gs[0], gs[1], 1, bs[0], bs[1], 1, shared,
1265+
ctx->err = cuLaunchKernel(k->k, gs[0], gs[1], 1, ls[0], ls[1], 1, shared,
12661266
ctx->s, args, NULL);
12671267
break;
12681268
case 3:
1269-
ctx->err = cuLaunchKernel(k->k, gs[0], gs[1], gs[2], bs[0], bs[1], bs[2],
1269+
ctx->err = cuLaunchKernel(k->k, gs[0], gs[1], gs[2], ls[0], ls[1], ls[2],
12701270
shared, ctx->s, args, NULL);
12711271
break;
12721272
default:

src/gpuarray_buffer_opencl.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ cl_mem cl_get_buf(gpudata *g) { ASSERT_BUF(g); return g->buf; }
274274

275275
static void cl_releasekernel(gpukernel *k);
276276
static int cl_callkernel(gpukernel *k, unsigned int n,
277-
const size_t *bs, const size_t *gs,
277+
const size_t *gs, const size_t *ls,
278278
size_t shared, void **args);
279279

280280
static const char CL_PREAMBLE[] =
@@ -748,7 +748,7 @@ static int cl_memset(gpudata *dst, size_t offset, int data) {
748748
if (res != GA_NO_ERROR) goto fail;
749749
gs = ((n-1) / ls) + 1;
750750
args[0] = dst;
751-
res = cl_callkernel(m, 1, &ls, &gs, 0, args);
751+
res = cl_callkernel(m, 1, &gs, &ls, 0, args);
752752

753753
fail:
754754
cl_releasekernel(m);
@@ -998,7 +998,7 @@ static int cl_setkernelarg(gpukernel *k, unsigned int i, void *a) {
998998
}
999999

10001000
static int cl_callkernel(gpukernel *k, unsigned int n,
1001-
const size_t *ls, const size_t *gs,
1001+
const size_t *gs, const size_t *ls,
10021002
size_t shared, void **args) {
10031003
cl_ctx *ctx = k->ctx;
10041004
size_t _gs[3];

src/gpuarray_elemwise.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -414,10 +414,10 @@ static int call_basic(GpuElemwise *ge, void **args, size_t n, unsigned int nd,
414414
}
415415
}
416416

417-
err = GpuKernel_sched(k, n, &ls, &gs);
417+
err = GpuKernel_sched(k, n, &gs, &ls);
418418
if (err != GA_NO_ERROR) goto error;
419419

420-
err = GpuKernel_call(k, 1, &ls, &gs, 0, NULL);
420+
err = GpuKernel_call(k, 1, &gs, &ls, 0, NULL);
421421
error:
422422
return err;
423423
}
@@ -572,9 +572,9 @@ static int call_contig(GpuElemwise *ge, void **args, size_t n) {
572572
if (err != GA_NO_ERROR) return err;
573573
}
574574
}
575-
err = GpuKernel_sched(&ge->k_contig, n, &ls, &gs);
575+
err = GpuKernel_sched(&ge->k_contig, n, &gs, &ls);
576576
if (err != GA_NO_ERROR) return err;
577-
return GpuKernel_call(&ge->k_contig, 1, &ls, &gs, 0, NULL);
577+
return GpuKernel_call(&ge->k_contig, 1, &gs, &ls, 0, NULL);
578578
}
579579

580580
GpuElemwise *GpuElemwise_new(gpucontext *ctx,

0 commit comments

Comments
 (0)