combined macros for std and mean

v923z · v923z · commit 0b20b30af650 · 2021-02-11T08:03:41.000+01:00
diff --git a/code/numpy/numerical/numerical.c b/code/numpy/numerical/numerical.c
@@ -259,6 +259,11 @@ static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t
         }
     } else {
         shape_strides _shape_strides = tools_reduce_axes(ndarray, axis);
+        // if(ndarray->ndim == 1) {
+        //     // if we have the single dimension, axis = 0 is equivalent to axis = None
+        //     // the call to tools_reduce_axes() has made sure that axis = 0
+        //     return numerical_sum_mean_std_ndarray(ndarray, mp_const_none, optype, ddof);
+        // }
         ndarray_obj_t *results = NULL;
         uint8_t *rarray = NULL;
 
@@ -278,46 +283,33 @@ static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t
                 // for floats, the sum might be inaccurate with the naive summation
                 // call mean, and multiply with the number of samples
                 mp_float_t *r = (mp_float_t *)results->array;
-                RUN_MEAN(mp_float_t, array, results, r, _shape_strides);
+                RUN_MEAN_STD(mp_float_t, array, r, _shape_strides, 0.0, 0);
                 mp_float_t norm = (mp_float_t)_shape_strides.shape[0];
                 // re-wind the array here
                 r = (mp_float_t *)results->array;
                 for(size_t i=0; i < results->len; i++) {
                     *r++ *= norm;
                 }
             }
-        } else if(optype == NUMERICAL_MEAN) {
-            results = ndarray_new_dense_ndarray(MAX(1, ndarray->ndim-1), _shape_strides.shape, NDARRAY_FLOAT);
-            mp_float_t *r = (mp_float_t *)results->array;
-            if(ndarray->dtype == NDARRAY_UINT8) {
-                RUN_MEAN(uint8_t, array, results, r, _shape_strides);
-            } else if(ndarray->dtype == NDARRAY_INT8) {
-                RUN_MEAN(int8_t, array, results, r, _shape_strides);
-            } else if(ndarray->dtype == NDARRAY_UINT16) {
-                RUN_MEAN(uint16_t, array, results, r, _shape_strides);
-            } else if(ndarray->dtype == NDARRAY_INT16) {
-                RUN_MEAN(int16_t, array, results, r, _shape_strides);
-            } else {
-                RUN_MEAN(mp_float_t, array, results, r, _shape_strides);
-            }
-        } else { // this case is certainly the standard deviation
+        } else {
+            bool isStd = optype == NUMERICAL_STD ? 1 : 0;
             results = ndarray_new_dense_ndarray(MAX(1, ndarray->ndim-1), _shape_strides.shape, NDARRAY_FLOAT);
             // we can return the 0 array here, if the degrees of freedom is larger than the length of the axis
-            if(_shape_strides.shape[0] <= ddof) {
+            if((optype == NUMERICAL_STD) && (_shape_strides.shape[0] <= ddof)) {
                 return MP_OBJ_FROM_PTR(results);
             }
-            mp_float_t div = (mp_float_t)(_shape_strides.shape[0] - ddof);
-            mp_float_t *r = (mp_float_t *)results->array;
+            mp_float_t div = optype == NUMERICAL_STD ? (mp_float_t)(_shape_strides.shape[0] - ddof) : 0.0;
+            mp_float_t *rarray = (mp_float_t *)results->array;
             if(ndarray->dtype == NDARRAY_UINT8) {
-                RUN_STD(uint8_t, array, results, r, _shape_strides, div);
+                RUN_MEAN_STD(uint8_t, array, rarray, _shape_strides, div, isStd);
             } else if(ndarray->dtype == NDARRAY_INT8) {
-                RUN_STD(int8_t, array, results, r, _shape_strides, div);
+                RUN_MEAN_STD(int8_t, array, rarray, _shape_strides, div, isStd);
             } else if(ndarray->dtype == NDARRAY_UINT16) {
-                RUN_STD(uint16_t, array, results, r, _shape_strides, div);
+                RUN_MEAN_STD(uint16_t, array, rarray, _shape_strides, div, isStd);
             } else if(ndarray->dtype == NDARRAY_INT16) {
-                RUN_STD(int16_t, array, results, r, _shape_strides, div);
+                RUN_MEAN_STD(int16_t, array, rarray, _shape_strides, div, isStd);
             } else {
-                RUN_STD(mp_float_t, array, results, r, _shape_strides, div);
+                RUN_MEAN_STD(mp_float_t, array, rarray, _shape_strides, div, isStd);
             }
         }
         if(ndarray->ndim == 1) { // return a scalar here
diff --git a/code/numpy/numerical/numerical.h b/code/numpy/numerical/numerical.h
@@ -59,35 +59,46 @@
 
 // The mean could be calculated by simply dividing the sum by
 // the number of elements, but that method is numerically unstable
-#define RUN_MEAN1(type, array, results, r, ss)\
+#define RUN_MEAN1(type, array, rarray, ss)\
 ({\
-    mp_float_t M, m;\
-    M = m = (mp_float_t)(*(type *)(array));\
-    for(size_t i=1; i < (ss).shape[0]; i++) {\
-        (array) += (ss).strides[0];\
+    mp_float_t M = 0.0;\
+    for(size_t i=0; i < (ss).shape[0]; i++) {\
         mp_float_t value = (mp_float_t)(*(type *)(array));\
-        m = M + (value - M) / (mp_float_t)(i+1);\
-        M = m;\
+        M = M + (value - M) / (mp_float_t)(i+1);\
+        (array) += (ss).strides[0];\
     }\
-    (array) += (ss).strides[0];\
-    *(r)++ = M;\
+    *(rarray)++ = M;\
 })
 
 // Instead of the straightforward implementation of the definition,
 // we take the numerically stable Welford algorithm here
 // https://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
-#define RUN_STD1(type, array, results, r, ss, div)\
+#define RUN_STD1(type, array, rarray, ss, div)\
 ({\
-    mp_float_t M = 0.0, m = 0.0, S = 0.0, s = 0.0;\
+    mp_float_t M = 0.0, m = 0.0, S = 0.0;\
     for(size_t i=0; i < (ss).shape[0]; i++) {\
         mp_float_t value = (mp_float_t)(*(type *)(array));\
         m = M + (value - M) / (mp_float_t)(i+1);\
-        s = S + (value - M) * (value - m);\
+        S = S + (value - M) * (value - m);\
+        M = m;\
+        (array) += (ss).strides[0];\
+    }\
+    *(rarray)++ = MICROPY_FLOAT_C_FUN(sqrt)(S / (div));\
+})
+
+#define RUN_MEAN_STD1(type, array, rarray, ss, div, isStd)\
+({\
+    mp_float_t M = 0.0, m = 0.0, S = 0.0;\
+    for(size_t i=0; i < (ss).shape[0]; i++) {\
+        mp_float_t value = (mp_float_t)(*(type *)(array));\
+        m = M + (value - M) / (mp_float_t)(i+1);\
+        if(isStd) {\
+            S += (value - M) * (value - m);\
+        }\
         M = m;\
-        S = s;\
         (array) += (ss).strides[0];\
     }\
-    *(r)++ = MICROPY_FLOAT_C_FUN(sqrt)(s / (div));\
+    *(rarray)++ = isStd ? MICROPY_FLOAT_C_FUN(sqrt)(S / (div)) : M;\
 })
 
 #define RUN_DIFF1(ndarray, type, array, results, rarray, index, stencil, N)\
@@ -181,12 +192,16 @@
     RUN_SUM1(type, (array), (results), (rarray), (ss));\
 } while(0)
 
-#define RUN_MEAN(type, array, results, r, ss) do {\
-    RUN_MEAN1(type, (array), (results), (r), (ss));\
+#define RUN_MEAN(type, array, rarray, ss) do {\
+    RUN_MEAN1(type, (array), (rarray), (ss));\
 } while(0)
 
-#define RUN_STD(type, array, results, r, ss, div) do {\
-    RUN_STD1(type, (array), (results), (r), (ss), (div));\
+#define RUN_STD(type, array, rarray, ss, div) do {\
+    RUN_STD1(type, (array), (results), (rarray), (ss), (div));\
+} while(0)
+
+#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
+    RUN_MEAN_STD1(type, (array), (results), (rarray), (ss), (div), (isStd));\
 } while(0)
 
 #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
@@ -218,26 +233,37 @@
     } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
 } while(0)
 
-#define RUN_MEAN(type, array, results, r, ss) do {\
+#define RUN_MEAN(type, array, rarray, ss) do {\
     size_t l = 0;\
     do {\
-        RUN_MEAN1(type, (array), (results), (r), (ss));\
+        RUN_MEAN1(type, (array), (rarray), (ss));\
         (array) -= (ss).strides[0] * (ss).shape[0];\
         (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
         l++;\
     } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
 } while(0)
 
-#define RUN_STD(type, array, results, r, ss, div) do {\
+#define RUN_STD(type, array, rarray, ss, div) do {\
     size_t l = 0;\
     do {\
-        RUN_STD1(type, (array), (results), (r), (ss), (div));\
+        RUN_STD1(type, (array), (rarray), (ss), (div));\
         (array) -= (ss).strides[0] * (ss).shape[0];\
         (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
         l++;\
     } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
 } while(0)
 
+#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
+    size_t l = 0;\
+    do {\
+        RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
+        (array) -= (ss).strides[0] * (ss).shape[0];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+
 #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
     size_t l = 0;\
     do {\
@@ -298,12 +324,28 @@
     } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
 } while(0)
 
-#define RUN_MEAN(type, array, results, r, ss) do {\
+#define RUN_MEAN(type, array, rarray, ss) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            RUN_MEAN1(type, (array), (rarray), (ss));\
+            (array) -= (ss).strides[0] * (ss).shape[0];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS-1];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define RUN_STD(type, array, rarray, ss, div) do {\
     size_t k = 0;\
     do {\
         size_t l = 0;\
         do {\
-            RUN_MEAN1(type, (array), (results), (r), (ss));\
+            RUN_STD1(type, (array), (rarray), (ss), (div));\
             (array) -= (ss).strides[0] * (ss).shape[0];\
             (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
             l++;\
@@ -314,12 +356,12 @@
     } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
 } while(0)
 
-#define RUN_STD(type, array, results, r, ss, div) do {\
+#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
     size_t k = 0;\
     do {\
         size_t l = 0;\
         do {\
-            RUN_STD1(type, (array), (results), (r), (ss), (div));\
+            RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
             (array) -= (ss).strides[0] * (ss).shape[0];\
             (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
             l++;\
@@ -424,14 +466,36 @@
     } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
 } while(0)
 
-#define RUN_MEAN(type, array, results, r, ss) do {\
+#define RUN_MEAN(type, array, rarray, ss) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                RUN_MEAN1(type, (array), (rarray), (ss));\
+                (array) -= (ss).strides[0] * (ss).shape[0];\
+                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS-1];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS-2];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define RUN_STD(type, array, rarray, ss, div) do {\
     size_t j = 0;\
     do {\
         size_t k = 0;\
         do {\
             size_t l = 0;\
             do {\
-                RUN_MEAN1(type, (array), (results), (r), (ss));\
+                RUN_STD1(type, (array), (rarray), (ss), (div));\
                 (array) -= (ss).strides[0] * (ss).shape[0];\
                 (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
                 l++;\
@@ -446,14 +510,14 @@
     } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
 } while(0)
 
-#define RUN_STD(type, array, results, r, ss, div) do {\
+#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
     size_t j = 0;\
     do {\
         size_t k = 0;\
         do {\
             size_t l = 0;\
             do {\
-                RUN_STD1(type, (array), (results), (r), (ss), (div));\
+                RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
                 (array) -= (ss).strides[0] * (ss).shape[0];\
                 (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
                 l++;\