|
| 1 | +/** |
| 2 | +* @license Apache-2.0 |
| 3 | +* |
| 4 | +* Copyright (c) 2025 The Stdlib Authors. |
| 5 | +* |
| 6 | +* Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +* you may not use this file except in compliance with the License. |
| 8 | +* You may obtain a copy of the License at |
| 9 | +* |
| 10 | +* http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +* |
| 12 | +* Unless required by applicable law or agreed to in writing, software |
| 13 | +* distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +* See the License for the specific language governing permissions and |
| 16 | +* limitations under the License. |
| 17 | +*/ |
| 18 | + |
| 19 | +#ifndef STDLIB_NDARRAY_BASE_UNARY_ACCUMULATE_MACROS_3D_H |
| 20 | +#define STDLIB_NDARRAY_BASE_UNARY_ACCUMULATE_MACROS_3D_H |
| 21 | + |
| 22 | +#include "stdlib/ndarray/ctor.h" |
| 23 | +#include "stdlib/ndarray/orders.h" |
| 24 | +#include <stdint.h> |
| 25 | + |
| 26 | +/** |
| 27 | +* Macro containing the preamble for nested loops which operate on elements of a three-dimensional ndarray. |
| 28 | +* |
| 29 | +* ## Notes |
| 30 | +* |
| 31 | +* - Variable naming conventions: |
| 32 | +* |
| 33 | +* - `sx#`, `px#`, and `d@x#` where `#` corresponds to the ndarray argument number, starting at `1`. |
| 34 | +* - `S@`, `i@`, and `d@x#` where `@` corresponds to the loop number, with `0` being the innermost loop. |
| 35 | +* |
| 36 | +* @param tout output type |
| 37 | +* |
| 38 | +* @example |
| 39 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_PREMABLE( double ) { |
| 40 | +* // Innermost loop body... |
| 41 | +* } |
| 42 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_EPILOGUE( double ) |
| 43 | +*/ |
| 44 | +#define STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_PREAMBLE( tout ) \ |
| 45 | + struct ndarray *x1 = arrays[ 0 ]; \ |
| 46 | + struct ndarray *x2 = arrays[ 1 ]; \ |
| 47 | + struct ndarray *x3 = arrays[ 2 ]; \ |
| 48 | + int64_t *shape = stdlib_ndarray_shape( x1 ); \ |
| 49 | + int64_t *sx1 = stdlib_ndarray_strides( x1 ); \ |
| 50 | + uint8_t *px1 = stdlib_ndarray_data( x1 ); \ |
| 51 | + uint8_t *px2 = stdlib_ndarray_data( x2 ); \ |
| 52 | + uint8_t *px3 = stdlib_ndarray_data( x3 ); \ |
| 53 | + int64_t d0x1; \ |
| 54 | + int64_t d1x1; \ |
| 55 | + int64_t d2x1; \ |
| 56 | + int64_t S0; \ |
| 57 | + int64_t S1; \ |
| 58 | + int64_t S2; \ |
| 59 | + int64_t i0; \ |
| 60 | + int64_t i1; \ |
| 61 | + int64_t i2; \ |
| 62 | + tout acc; \ |
| 63 | + /* Extract loop variables for purposes of loop interchange: dimensions and loop offset (pointer) increments... */ \ |
| 64 | + if ( stdlib_ndarray_order( x1 ) == STDLIB_NDARRAY_ROW_MAJOR ) { \ |
| 65 | + /* For row-major ndarrays, the last dimensions have the fastest changing indices... */ \ |
| 66 | + S0 = shape[ 2 ]; \ |
| 67 | + S1 = shape[ 1 ]; \ |
| 68 | + S2 = shape[ 0 ]; \ |
| 69 | + d0x1 = sx1[ 2 ]; \ |
| 70 | + d1x1 = sx1[ 1 ] - ( S0*sx1[2] ); \ |
| 71 | + d2x1 = sx1[ 0 ] - ( S1*sx1[1] ); \ |
| 72 | + } else { \ |
| 73 | + /* For column-major ndarrays, the first dimensions have the fastest changing indices... */ \ |
| 74 | + S0 = shape[ 0 ]; \ |
| 75 | + S1 = shape[ 1 ]; \ |
| 76 | + S2 = shape[ 2 ]; \ |
| 77 | + d0x1 = sx1[ 0 ]; \ |
| 78 | + d1x1 = sx1[ 1 ] - ( S0*sx1[0] ); \ |
| 79 | + d2x1 = sx1[ 2 ] - ( S1*sx1[1] ); \ |
| 80 | + } \ |
| 81 | + /* Set the pointers to the first indexed elements... */ \ |
| 82 | + px1 += stdlib_ndarray_offset( x1 ); \ |
| 83 | + px2 += stdlib_ndarray_offset( x2 ); \ |
| 84 | + px3 += stdlib_ndarray_offset( x3 ); \ |
| 85 | + /* Initialize the accumulator: */ \ |
| 86 | + acc = (tout *)px2; \ |
| 87 | + /* Iterate over the ndarray dimensions... */ \ |
| 88 | + for ( i2 = 0; i2 < S2; i2++, px1 += d2x1 ) { \ |
| 89 | + for ( i1 = 0; i1 < S1; i1++, px1 += d1x1 ) { \ |
| 90 | + for ( i0 = 0; i0 < S0; i0++, px1 += d0x1 ) |
| 91 | + |
| 92 | +/** |
| 93 | +* Macro containing the epilogue for nested loops which operate on elements of a three-dimensional ndarray. |
| 94 | +* |
| 95 | +* @param tout output type |
| 96 | +* |
| 97 | +* @example |
| 98 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_PREMABLE( double ) { |
| 99 | +* // Innermost loop body... |
| 100 | +* } |
| 101 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_EPILOGUE( double ) |
| 102 | +*/ |
| 103 | +#define STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_EPILOGUE( tout ) \ |
| 104 | + } \ |
| 105 | + } \ |
| 106 | + *(tout *)px3 = acc; |
| 107 | + |
| 108 | +/** |
| 109 | +* Macro for a three-dimensional ndarray loop which inlines an expression. |
| 110 | +* |
| 111 | +* ## Notes |
| 112 | +* |
| 113 | +* - Retrieves each input ndarray element according to type `tin` via the pointer `px1` as `in1`. |
| 114 | +* - Expects a provided expression to operate on `tin in1` and update `acc`. |
| 115 | +* |
| 116 | +* @param tin input type |
| 117 | +* @param tout output type |
| 118 | +* @param expr expression to inline |
| 119 | +* |
| 120 | +* @example |
| 121 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_INLINE( double, double, acc += in1 ) |
| 122 | +*/ |
| 123 | +#define STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_INLINE( tin, tout, expr ) \ |
| 124 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_PREAMBLE( tout ) { \ |
| 125 | + const tin in1 = *(tin *)px1; \ |
| 126 | + expr; \ |
| 127 | + } \ |
| 128 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_EPILOGUE( tout ) |
| 129 | + |
| 130 | +/** |
| 131 | +* Macro for a three-dimensional ndarray loop which invokes a callback. |
| 132 | +* |
| 133 | +* ## Notes |
| 134 | +* |
| 135 | +* - Retrieves each input ndarray element according to type `tin` via the pointer `px1`. |
| 136 | +* - Explicitly casts each function `f` invocation result to `tout`. |
| 137 | +* |
| 138 | +* @param tin input type |
| 139 | +* @param tout output type |
| 140 | +* |
| 141 | +* @example |
| 142 | +* // e.g., dd_d |
| 143 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_CLBK( double, double ) |
| 144 | +*/ |
| 145 | +#define STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_CLBK( tin, tout ) \ |
| 146 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_PREAMBLE( tout ) { \ |
| 147 | + const tin x = *(tin *)px1; \ |
| 148 | + acc = (tout)f( acc, x ); \ |
| 149 | + } \ |
| 150 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_EPILOGUE( tout ) |
| 151 | + |
| 152 | +/** |
| 153 | +* Macro for a three-dimensional loop which invokes a callback and does not cast the return callback's return value (e.g., a `struct`). |
| 154 | +* |
| 155 | +* ## Notes |
| 156 | +* |
| 157 | +* - Retrieves each input ndarray element according to type `tin` via a pointer `px1`. |
| 158 | +* |
| 159 | +* @param tin input type |
| 160 | +* @param tout output type |
| 161 | +* |
| 162 | +* @example |
| 163 | +* #include "stdlib/complex/float64/ctor.h" |
| 164 | +* |
| 165 | +* // e.g., zz_z |
| 166 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_CLBK_RET_NOCAST( stdlib_complex128_t, stdlib_complex128_t ) |
| 167 | +*/ |
| 168 | +#define STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_CLBK_RET_NOCAST( tin, tout ) \ |
| 169 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_PREAMBLE( tout ) { \ |
| 170 | + const tin x = *(tin *)px1; \ |
| 171 | + acc = f( acc, x ); \ |
| 172 | + } \ |
| 173 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_EPILOGUE( tout ) |
| 174 | + |
| 175 | +/** |
| 176 | +* Macro for a three-dimensional ndarray loop which invokes a callback requiring arguments be explicitly cast to a different type. |
| 177 | +* |
| 178 | +* ## Notes |
| 179 | +* |
| 180 | +* - Retrieves each ndarray element according to type `tin` via the pointer `px1`. |
| 181 | +* - Explicitly casts each function accumulator argument to `fin1`. |
| 182 | +* - Explicitly casts each function element argument to `fin2`. |
| 183 | +* - Explicitly casts each function `f` invocation result to `tout`. |
| 184 | +* |
| 185 | +* @param tin input type |
| 186 | +* @param tout output type |
| 187 | +* @param fin1 callback accumulator argument type |
| 188 | +* @param fin2 callback element argument type |
| 189 | +* |
| 190 | +* @example |
| 191 | +* // e.g., ff_f_as_dd_d |
| 192 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_CLBK_ARG_CAST( float, float, double, double ) |
| 193 | +*/ |
| 194 | +#define STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_CLBK_ARG_CAST( tin, tout, fin1, fin2 ) \ |
| 195 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_PREAMBLE( tout ) { \ |
| 196 | + const tin x = *(tin *)px1; \ |
| 197 | + acc = (tout)f( (fin1)acc, (fin2)x ); \ |
| 198 | + } \ |
| 199 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_EPILOGUE( tout ) |
| 200 | + |
| 201 | +/** |
| 202 | +* Macro for a three-dimensional ndarray loop which invokes a callback requiring arguments be cast to a different type via casting functions. |
| 203 | +* |
| 204 | +* ## Notes |
| 205 | +* |
| 206 | +* - Retrieves each ndarray element according to type `tin` via a pointer `px1`. |
| 207 | +* - Explicitly casts each function accumulator argument via `cin1`. |
| 208 | +* - Explicitly casts each function element argument via `cin2`. |
| 209 | +* - Explicitly casts each function `f` invocation result via `cout`. |
| 210 | +* |
| 211 | +* @param tin input type |
| 212 | +* @param tout output type |
| 213 | +* @param cin1 input casting function for the accumulator argument |
| 214 | +* @param cin2 input casting function for the element argument |
| 215 | +* @param cout output casting function |
| 216 | +* |
| 217 | +* @example |
| 218 | +* #include "stdlib/complex/float32/ctor.h" |
| 219 | +* #include "stdlib/complex/float64/ctor.h" |
| 220 | +* |
| 221 | +* // e.g., fc_c_as_zz_z |
| 222 | +* STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_CLBK_ARG_CAST_FCN( float, stdlib_complex64_t, stdlib_complex128_from_complex64, stdlib_complex128_from_float32, stdlib_complex128_to_complex64 ) |
| 223 | +*/ |
| 224 | +#define STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_CLBK_ARG_CAST_FCN( tin, tout, cin1, cin2, cout ) \ |
| 225 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_PREAMBLE( tout ) { \ |
| 226 | + const tin x = *(tin *)px1; \ |
| 227 | + acc = cout( f( cin1( acc ), cin2( x ) ) ); \ |
| 228 | + } \ |
| 229 | + STDLIB_NDARRAY_UNARY_ACCUMULATE_3D_LOOP_EPILOGUE( tout ) |
| 230 | + |
| 231 | +#endif // !STDLIB_NDARRAY_BASE_UNARY_ACCUMULATE_MACROS_3D_H |
0 commit comments