|
59 | 59 |
|
60 | 60 | // The mean could be calculated by simply dividing the sum by
|
61 | 61 | // the number of elements, but that method is numerically unstable
|
62 |
| -#define RUN_MEAN1(type, array, results, r, ss)\ |
| 62 | +#define RUN_MEAN1(type, array, rarray, ss)\ |
63 | 63 | ({\
|
64 |
| - mp_float_t M, m;\ |
65 |
| - M = m = (mp_float_t)(*(type *)(array));\ |
66 |
| - for(size_t i=1; i < (ss).shape[0]; i++) {\ |
67 |
| - (array) += (ss).strides[0];\ |
| 64 | + mp_float_t M = 0.0;\ |
| 65 | + for(size_t i=0; i < (ss).shape[0]; i++) {\ |
68 | 66 | mp_float_t value = (mp_float_t)(*(type *)(array));\
|
69 |
| - m = M + (value - M) / (mp_float_t)(i+1);\ |
70 |
| - M = m;\ |
| 67 | + M = M + (value - M) / (mp_float_t)(i+1);\ |
| 68 | + (array) += (ss).strides[0];\ |
71 | 69 | }\
|
72 |
| - (array) += (ss).strides[0];\ |
73 |
| - *(r)++ = M;\ |
| 70 | + *(rarray)++ = M;\ |
74 | 71 | })
|
75 | 72 |
|
76 | 73 | // Instead of the straightforward implementation of the definition,
|
77 | 74 | // we take the numerically stable Welford algorithm here
|
78 | 75 | // https://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
|
79 |
| -#define RUN_STD1(type, array, results, r, ss, div)\ |
| 76 | +#define RUN_STD1(type, array, rarray, ss, div)\ |
80 | 77 | ({\
|
81 |
| - mp_float_t M = 0.0, m = 0.0, S = 0.0, s = 0.0;\ |
| 78 | + mp_float_t M = 0.0, m = 0.0, S = 0.0;\ |
82 | 79 | for(size_t i=0; i < (ss).shape[0]; i++) {\
|
83 | 80 | mp_float_t value = (mp_float_t)(*(type *)(array));\
|
84 | 81 | m = M + (value - M) / (mp_float_t)(i+1);\
|
85 |
| - s = S + (value - M) * (value - m);\ |
| 82 | + S = S + (value - M) * (value - m);\ |
| 83 | + M = m;\ |
| 84 | + (array) += (ss).strides[0];\ |
| 85 | + }\ |
| 86 | + *(rarray)++ = MICROPY_FLOAT_C_FUN(sqrt)(S / (div));\ |
| 87 | +}) |
| 88 | + |
| 89 | +#define RUN_MEAN_STD1(type, array, rarray, ss, div, isStd)\ |
| 90 | +({\ |
| 91 | + mp_float_t M = 0.0, m = 0.0, S = 0.0;\ |
| 92 | + for(size_t i=0; i < (ss).shape[0]; i++) {\ |
| 93 | + mp_float_t value = (mp_float_t)(*(type *)(array));\ |
| 94 | + m = M + (value - M) / (mp_float_t)(i+1);\ |
| 95 | + if(isStd) {\ |
| 96 | + S += (value - M) * (value - m);\ |
| 97 | + }\ |
86 | 98 | M = m;\
|
87 |
| - S = s;\ |
88 | 99 | (array) += (ss).strides[0];\
|
89 | 100 | }\
|
90 |
| - *(r)++ = MICROPY_FLOAT_C_FUN(sqrt)(s / (div));\ |
| 101 | + *(rarray)++ = isStd ? MICROPY_FLOAT_C_FUN(sqrt)(S / (div)) : M;\ |
91 | 102 | })
|
92 | 103 |
|
93 | 104 | #define RUN_DIFF1(ndarray, type, array, results, rarray, index, stencil, N)\
|
|
181 | 192 | RUN_SUM1(type, (array), (results), (rarray), (ss));\
|
182 | 193 | } while(0)
|
183 | 194 |
|
184 |
| -#define RUN_MEAN(type, array, results, r, ss) do {\ |
185 |
| - RUN_MEAN1(type, (array), (results), (r), (ss));\ |
| 195 | +#define RUN_MEAN(type, array, rarray, ss) do {\ |
| 196 | + RUN_MEAN1(type, (array), (rarray), (ss));\ |
186 | 197 | } while(0)
|
187 | 198 |
|
188 |
| -#define RUN_STD(type, array, results, r, ss, div) do {\ |
189 |
| - RUN_STD1(type, (array), (results), (r), (ss), (div));\ |
| 199 | +#define RUN_STD(type, array, rarray, ss, div) do {\ |
| 200 | + RUN_STD1(type, (array), (results), (rarray), (ss), (div));\ |
| 201 | +} while(0) |
| 202 | + |
| 203 | +#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\ |
| 204 | + RUN_MEAN_STD1(type, (array), (results), (rarray), (ss), (div), (isStd));\ |
190 | 205 | } while(0)
|
191 | 206 |
|
192 | 207 | #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
|
|
218 | 233 | } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
|
219 | 234 | } while(0)
|
220 | 235 |
|
221 |
| -#define RUN_MEAN(type, array, results, r, ss) do {\ |
| 236 | +#define RUN_MEAN(type, array, rarray, ss) do {\ |
222 | 237 | size_t l = 0;\
|
223 | 238 | do {\
|
224 |
| - RUN_MEAN1(type, (array), (results), (r), (ss));\ |
| 239 | + RUN_MEAN1(type, (array), (rarray), (ss));\ |
225 | 240 | (array) -= (ss).strides[0] * (ss).shape[0];\
|
226 | 241 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
|
227 | 242 | l++;\
|
228 | 243 | } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
|
229 | 244 | } while(0)
|
230 | 245 |
|
231 |
| -#define RUN_STD(type, array, results, r, ss, div) do {\ |
| 246 | +#define RUN_STD(type, array, rarray, ss, div) do {\ |
232 | 247 | size_t l = 0;\
|
233 | 248 | do {\
|
234 |
| - RUN_STD1(type, (array), (results), (r), (ss), (div));\ |
| 249 | + RUN_STD1(type, (array), (rarray), (ss), (div));\ |
235 | 250 | (array) -= (ss).strides[0] * (ss).shape[0];\
|
236 | 251 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
|
237 | 252 | l++;\
|
238 | 253 | } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
|
239 | 254 | } while(0)
|
240 | 255 |
|
| 256 | +#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\ |
| 257 | + size_t l = 0;\ |
| 258 | + do {\ |
| 259 | + RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\ |
| 260 | + (array) -= (ss).strides[0] * (ss).shape[0];\ |
| 261 | + (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
| 262 | + l++;\ |
| 263 | + } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
| 264 | +} while(0) |
| 265 | + |
| 266 | + |
241 | 267 | #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
|
242 | 268 | size_t l = 0;\
|
243 | 269 | do {\
|
|
298 | 324 | } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
|
299 | 325 | } while(0)
|
300 | 326 |
|
301 |
| -#define RUN_MEAN(type, array, results, r, ss) do {\ |
| 327 | +#define RUN_MEAN(type, array, rarray, ss) do {\ |
| 328 | + size_t k = 0;\ |
| 329 | + do {\ |
| 330 | + size_t l = 0;\ |
| 331 | + do {\ |
| 332 | + RUN_MEAN1(type, (array), (rarray), (ss));\ |
| 333 | + (array) -= (ss).strides[0] * (ss).shape[0];\ |
| 334 | + (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
| 335 | + l++;\ |
| 336 | + } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
| 337 | + (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS-1];\ |
| 338 | + (array) += (ss).strides[ULAB_MAX_DIMS - 2];\ |
| 339 | + k++;\ |
| 340 | + } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\ |
| 341 | +} while(0) |
| 342 | + |
| 343 | +#define RUN_STD(type, array, rarray, ss, div) do {\ |
302 | 344 | size_t k = 0;\
|
303 | 345 | do {\
|
304 | 346 | size_t l = 0;\
|
305 | 347 | do {\
|
306 |
| - RUN_MEAN1(type, (array), (results), (r), (ss));\ |
| 348 | + RUN_STD1(type, (array), (rarray), (ss), (div));\ |
307 | 349 | (array) -= (ss).strides[0] * (ss).shape[0];\
|
308 | 350 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
|
309 | 351 | l++;\
|
|
314 | 356 | } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
|
315 | 357 | } while(0)
|
316 | 358 |
|
317 |
| -#define RUN_STD(type, array, results, r, ss, div) do {\ |
| 359 | +#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\ |
318 | 360 | size_t k = 0;\
|
319 | 361 | do {\
|
320 | 362 | size_t l = 0;\
|
321 | 363 | do {\
|
322 |
| - RUN_STD1(type, (array), (results), (r), (ss), (div));\ |
| 364 | + RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\ |
323 | 365 | (array) -= (ss).strides[0] * (ss).shape[0];\
|
324 | 366 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
|
325 | 367 | l++;\
|
|
424 | 466 | } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
|
425 | 467 | } while(0)
|
426 | 468 |
|
427 |
| -#define RUN_MEAN(type, array, results, r, ss) do {\ |
| 469 | +#define RUN_MEAN(type, array, rarray, ss) do {\ |
| 470 | + size_t j = 0;\ |
| 471 | + do {\ |
| 472 | + size_t k = 0;\ |
| 473 | + do {\ |
| 474 | + size_t l = 0;\ |
| 475 | + do {\ |
| 476 | + RUN_MEAN1(type, (array), (rarray), (ss));\ |
| 477 | + (array) -= (ss).strides[0] * (ss).shape[0];\ |
| 478 | + (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
| 479 | + l++;\ |
| 480 | + } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
| 481 | + (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS-1];\ |
| 482 | + (array) += (ss).strides[ULAB_MAX_DIMS - 2];\ |
| 483 | + k++;\ |
| 484 | + } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\ |
| 485 | + (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS-2];\ |
| 486 | + (array) += (ss).strides[ULAB_MAX_DIMS - 3];\ |
| 487 | + j++;\ |
| 488 | + } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\ |
| 489 | +} while(0) |
| 490 | + |
| 491 | +#define RUN_STD(type, array, rarray, ss, div) do {\ |
428 | 492 | size_t j = 0;\
|
429 | 493 | do {\
|
430 | 494 | size_t k = 0;\
|
431 | 495 | do {\
|
432 | 496 | size_t l = 0;\
|
433 | 497 | do {\
|
434 |
| - RUN_MEAN1(type, (array), (results), (r), (ss));\ |
| 498 | + RUN_STD1(type, (array), (rarray), (ss), (div));\ |
435 | 499 | (array) -= (ss).strides[0] * (ss).shape[0];\
|
436 | 500 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
|
437 | 501 | l++;\
|
|
446 | 510 | } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
|
447 | 511 | } while(0)
|
448 | 512 |
|
449 |
| -#define RUN_STD(type, array, results, r, ss, div) do {\ |
| 513 | +#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\ |
450 | 514 | size_t j = 0;\
|
451 | 515 | do {\
|
452 | 516 | size_t k = 0;\
|
453 | 517 | do {\
|
454 | 518 | size_t l = 0;\
|
455 | 519 | do {\
|
456 |
| - RUN_STD1(type, (array), (results), (r), (ss), (div));\ |
| 520 | + RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\ |
457 | 521 | (array) -= (ss).strides[0] * (ss).shape[0];\
|
458 | 522 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
|
459 | 523 | l++;\
|
|
0 commit comments