Skip to content

Commit fa5222c

Browse files
committed
Massive Refactor into effectively a lattice engine.
1 parent 0eb633c commit fa5222c

File tree

3 files changed

+2407
-1984
lines changed

3 files changed

+2407
-1984
lines changed

src/gpuarray/reduction.h

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ typedef enum _ga_reduce_op {
4646
GA_REDUCE_XOR, /* ^ */
4747
GA_REDUCE_ALL, /* &&/all() */
4848
GA_REDUCE_ANY, /* ||/any() */
49+
50+
GA_REDUCE_ENDSUPPORTED /* Must be last element in enum */
4951
} ga_reduce_op;
5052

5153

@@ -57,29 +59,31 @@ typedef enum _ga_reduce_op {
5759
* @param [out] gr The reduction operator.
5860
* @param [in] gpuCtx The GPU context.
5961
* @param [in] op The reduction operation to perform.
60-
* @param [in] ndf The minimum number of destination dimensions to support.
61-
* @param [in] ndr The minimum number of reduction dimensions to support.
62-
* @param [in] srcTypeCode The data type of the source operand.
62+
* @param [in] ndf The minimum number of free (destination) dimensions to support.
63+
* @param [in] ndr The minimum number of reduction (source) dimensions to support.
64+
* @param [in] s0TypeCode The data type of the source operand.
6365
* @param [in] flags Reduction operator creation flags. Currently must be
6466
* set to 0.
6567
*
66-
* @return GA_NO_ERROR if the operator was created successfully, or a non-zero
67-
* error code otherwise.
68+
* @return GA_NO_ERROR if the operator was created successfully
69+
* GA_INVALID_ERROR if grOut is NULL, or some other argument was invalid
70+
* GA_NO_MEMORY if memory allocation failed anytime during creation
71+
* or other non-zero error codes otherwise.
6872
*/
6973

70-
GPUARRAY_PUBLIC int GpuReduction_new (GpuReduction** grOut,
71-
gpucontext* gpuCtx,
72-
ga_reduce_op op,
73-
unsigned ndf,
74-
unsigned ndr,
75-
int srcTypeCode,
76-
int flags);
74+
GPUARRAY_PUBLIC int GpuReduction_new (GpuReduction** grOut,
75+
gpucontext* gpuCtx,
76+
ga_reduce_op op,
77+
unsigned ndf,
78+
unsigned ndr,
79+
int s0TypeCode,
80+
int flags);
7781

7882
/**
7983
* @brief Deallocate an operator allocated by GpuReduction_new().
8084
*/
8185

82-
GPUARRAY_PUBLIC void GpuReduction_free (GpuReduction* gr);
86+
GPUARRAY_PUBLIC void GpuReduction_free (GpuReduction* gr);
8387

8488
/**
8589
* @brief Invoke an operator allocated by GpuReduction_new() on a source tensor.
@@ -91,42 +95,41 @@ GPUARRAY_PUBLIC void GpuReduction_free (GpuReduction* gr);
9195
* destination.
9296
*
9397
* @param [in] gr The reduction operator.
94-
* @param [out] dst The destination tensor. Has the same type as the source.
95-
* @param [out] dstArg For argument of minima/maxima operations. Has type int64.
96-
* @param [in] src The source tensor.
98+
* @param [out] d0 The destination tensor.
99+
* @param [out] d1 The second destination tensor, for argmin/argmax operations.
100+
* @param [in] s0 The source tensor.
97101
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
98-
* <= src->nd.
102+
* <= s0->nd.
99103
* @param [in] reduxList A list of integers of length reduxLen, indicating
100104
* the axes to be reduced. The order of the axes
101-
* matters for dstArg index calculations (GpuArray_argmin,
102-
* GpuArray_argmax, GpuArray_minandargmin,
103-
* GpuArray_maxandargmax). All entries in the list must be
105+
* matters for dstArg index calculations (argmin, argmax,
106+
* minandargmin, maxandargmax). All entries in the list must be
104107
* unique, >= 0 and < src->nd.
105108
*
106-
* For example, if a 5D-tensor is max-reduced with an axis
107-
* list of [3,4,1], then reduxLen shall be 3, and the
109+
* For example, if a 5D-tensor is maxandargmax-reduced with an
110+
* axis list of [3,4,1], then reduxLen shall be 3, and the
108111
* index calculation in every point shall take the form
109112
*
110-
* dstArgmax[i0,i2] = i3 * src.shape[4] * src.shape[1] +
111-
* i4 * src.shape[1] +
112-
* i1
113+
* d1[i0,i2] = i3 * s0.shape[4] * s0.shape[1] +
114+
* i4 * s0.shape[1] +
115+
* i1
113116
*
114117
* where (i3,i4,i1) are the coordinates of the maximum-
115-
* valued element within subtensor [i0,:,i2,:,:] of src.
118+
* valued element within subtensor [i0,:,i2,:,:] of s0.
116119
* @param [in] flags Reduction operator invocation flags. Currently must be
117120
* set to 0.
118121
*
119122
* @return GA_NO_ERROR if the operator was invoked successfully, or a non-zero
120123
* error code otherwise.
121124
*/
122125

123-
GPUARRAY_PUBLIC int GpuReduction_call (GpuReduction* gr,
124-
GpuArray* dst,
125-
GpuArray* dstArg,
126-
const GpuArray* src,
127-
unsigned reduxLen,
128-
const int* reduxList,
129-
int flags);
126+
GPUARRAY_PUBLIC int GpuReduction_call (const GpuReduction* gr,
127+
GpuArray* d0,
128+
GpuArray* d1,
129+
const GpuArray* s0,
130+
unsigned reduxLen,
131+
const int* reduxList,
132+
int flags);
130133

131134

132135
#ifdef __cplusplus

0 commit comments

Comments
 (0)