@@ -46,6 +46,8 @@ typedef enum _ga_reduce_op {
4646 GA_REDUCE_XOR , /* ^ */
4747 GA_REDUCE_ALL , /* &&/all() */
4848 GA_REDUCE_ANY , /* ||/any() */
49+
50+ GA_REDUCE_ENDSUPPORTED /* Must be last element in enum */
4951} ga_reduce_op ;
5052
5153
@@ -57,29 +59,31 @@ typedef enum _ga_reduce_op {
5759 * @param [out] gr The reduction operator.
5860 * @param [in] gpuCtx The GPU context.
5961 * @param [in] op The reduction operation to perform.
60- * @param [in] ndf The minimum number of destination dimensions to support.
61- * @param [in] ndr The minimum number of reduction dimensions to support.
62- * @param [in] srcTypeCode The data type of the source operand.
62+ * @param [in] ndf The minimum number of free ( destination) dimensions to support.
63+ * @param [in] ndr The minimum number of reduction (source) dimensions to support.
64+ * @param [in] s0TypeCode The data type of the source operand.
6365 * @param [in] flags Reduction operator creation flags. Currently must be
6466 * set to 0.
6567 *
66- * @return GA_NO_ERROR if the operator was created successfully, or a non-zero
67- * error code otherwise.
68+ * @return GA_NO_ERROR if the operator was created successfully
69+ * GA_INVALID_ERROR if grOut is NULL, or some other argument was invalid
70+ * GA_NO_MEMORY if memory allocation failed anytime during creation
71+ * or other non-zero error codes otherwise.
6872 */
6973
70- GPUARRAY_PUBLIC int GpuReduction_new (GpuReduction * * grOut ,
71- gpucontext * gpuCtx ,
72- ga_reduce_op op ,
73- unsigned ndf ,
74- unsigned ndr ,
75- int srcTypeCode ,
76- int flags );
74+ GPUARRAY_PUBLIC int GpuReduction_new (GpuReduction * * grOut ,
75+ gpucontext * gpuCtx ,
76+ ga_reduce_op op ,
77+ unsigned ndf ,
78+ unsigned ndr ,
79+ int s0TypeCode ,
80+ int flags );
7781
7882/**
7983 * @brief Deallocate an operator allocated by GpuReduction_new().
8084 */
8185
82- GPUARRAY_PUBLIC void GpuReduction_free (GpuReduction * gr );
86+ GPUARRAY_PUBLIC void GpuReduction_free (GpuReduction * gr );
8387
8488/**
8589 * @brief Invoke an operator allocated by GpuReduction_new() on a source tensor.
@@ -91,42 +95,41 @@ GPUARRAY_PUBLIC void GpuReduction_free (GpuReduction* gr);
9195 * destination.
9296 *
9397 * @param [in] gr The reduction operator.
94- * @param [out] dst The destination tensor. Has the same type as the source .
95- * @param [out] dstArg For argument of minima/maxima operations. Has type int64 .
96- * @param [in] src The source tensor.
98+ * @param [out] d0 The destination tensor.
99+ * @param [out] d1 The second destination tensor, for argmin/argmax operations .
100+ * @param [in] s0 The source tensor.
97101 * @param [in] reduxLen The number of axes reduced. Must be >= 1 and
98- * <= src ->nd.
102+ * <= s0 ->nd.
99103 * @param [in] reduxList A list of integers of length reduxLen, indicating
100104 * the axes to be reduced. The order of the axes
101- * matters for dstArg index calculations (GpuArray_argmin,
102- * GpuArray_argmax, GpuArray_minandargmin,
103- * GpuArray_maxandargmax). All entries in the list must be
105+ * matters for dstArg index calculations (argmin, argmax,
106+ * minandargmin, maxandargmax). All entries in the list must be
104107 * unique, >= 0 and < src->nd.
105108 *
106- * For example, if a 5D-tensor is max -reduced with an axis
107- * list of [3,4,1], then reduxLen shall be 3, and the
109+ * For example, if a 5D-tensor is maxandargmax -reduced with an
110+ * axis list of [3,4,1], then reduxLen shall be 3, and the
108111 * index calculation in every point shall take the form
109112 *
110- * dstArgmax [i0,i2] = i3 * src .shape[4] * src .shape[1] +
111- * i4 * src .shape[1] +
112- * i1
113+ * d1 [i0,i2] = i3 * s0 .shape[4] * s0 .shape[1] +
114+ * i4 * s0 .shape[1] +
115+ * i1
113116 *
114117 * where (i3,i4,i1) are the coordinates of the maximum-
115- * valued element within subtensor [i0,:,i2,:,:] of src .
118+ * valued element within subtensor [i0,:,i2,:,:] of s0 .
116119 * @param [in] flags Reduction operator invocation flags. Currently must be
117120 * set to 0.
118121 *
119122 * @return GA_NO_ERROR if the operator was invoked successfully, or a non-zero
120123 * error code otherwise.
121124 */
122125
123- GPUARRAY_PUBLIC int GpuReduction_call (GpuReduction * gr ,
124- GpuArray * dst ,
125- GpuArray * dstArg ,
126- const GpuArray * src ,
127- unsigned reduxLen ,
128- const int * reduxList ,
129- int flags );
126+ GPUARRAY_PUBLIC int GpuReduction_call (const GpuReduction * gr ,
127+ GpuArray * d0 ,
128+ GpuArray * d1 ,
129+ const GpuArray * s0 ,
130+ unsigned reduxLen ,
131+ const int * reduxList ,
132+ int flags );
130133
131134
132135#ifdef __cplusplus
0 commit comments