|
| 1 | +#define PY_SSIZE_T_CLEAN |
| 2 | +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION |
| 3 | +#include <Python.h> // pull the python API into the current namespace |
| 4 | +#include <numpy/arrayobject.h> |
| 5 | +#include <numpy/npy_math.h> |
| 6 | +#include <math.h> |
| 7 | + |
| 8 | +// This code is adapted from https://github.com/dgoeries/lttbc |
| 9 | +// Most credits are due to https://github.com/dgoeries |
| 10 | + |
| 11 | +// method below assumes the x-delta's are equidistant |
| 12 | +static PyObject* downsample_return_index(PyObject *self, PyObject *args) { |
| 13 | + int threshold; |
| 14 | + PyObject *x_obj = NULL, *y_obj = NULL; |
| 15 | + PyArrayObject *x_array = NULL, *y_array = NULL; |
| 16 | + |
| 17 | + if (!PyArg_ParseTuple(args, "OOi", &x_obj, &y_obj, &threshold)) |
| 18 | + return NULL; |
| 19 | + |
| 20 | + if ((!PyArray_Check(x_obj) && !PyList_Check(x_obj)) || (!PyArray_Check(y_obj) && !PyList_Check(y_obj))) { |
| 21 | + PyErr_SetString(PyExc_TypeError, "Function requires x and y input to be of type list or ndarray ..."); |
| 22 | + goto fail; |
| 23 | + } |
| 24 | + |
| 25 | + |
| 26 | + // Interpret the input objects as numpy arrays, with reqs (contiguous, aligned, and writeable ...) |
| 27 | + x_array = (PyArrayObject *)PyArray_FROM_OTF(x_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); |
| 28 | + y_array = (PyArrayObject *)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); |
| 29 | + if (x_array == NULL || y_array == NULL) { |
| 30 | + goto fail; |
| 31 | + } |
| 32 | + |
| 33 | + if (PyArray_NDIM(x_array) != 1 || PyArray_NDIM(y_array) != 1) {; |
| 34 | + PyErr_SetString(PyExc_ValueError, "Both x and y must have a single dimension ..."); |
| 35 | + goto fail; |
| 36 | + } |
| 37 | + |
| 38 | + if (!PyArray_SAMESHAPE(x_array, y_array)) { |
| 39 | + PyErr_SetString(PyExc_ValueError, "Input x and y must have the same shape ..."); |
| 40 | + goto fail; |
| 41 | + } |
| 42 | + |
| 43 | + // Declare data length and check if we actually have to downsample! |
| 44 | + const Py_ssize_t data_length = (Py_ssize_t)PyArray_DIM(y_array, 0); |
| 45 | + if (threshold >= data_length || threshold <= 0) { |
| 46 | + // Nothing to do! |
| 47 | + PyObject *value = Py_BuildValue("O", x_array); |
| 48 | + Py_DECREF(x_array); |
| 49 | + Py_DECREF(y_array); |
| 50 | + return value; |
| 51 | + } |
| 52 | + |
| 53 | + // Access the data in the NDArray! |
| 54 | + double *y = (double*)PyArray_DATA(y_array); |
| 55 | + |
| 56 | + // Create an empty output array with shape and dim for the output! |
| 57 | + npy_intp dims[1]; |
| 58 | + dims[0] = threshold; |
| 59 | + PyArrayObject *sampled_x = (PyArrayObject *)PyArray_Empty(1, dims, |
| 60 | + PyArray_DescrFromType(NPY_INT64), 0); |
| 61 | + |
| 62 | + // Get a pointer to its data |
| 63 | + long long *sampled_x_data = (long long*)PyArray_DATA(sampled_x); |
| 64 | + |
| 65 | + // The main loop here! |
| 66 | + Py_ssize_t sampled_index = 0; |
| 67 | + const double every = (double)(data_length - 2) / (threshold - 2); |
| 68 | + |
| 69 | + Py_ssize_t a = 0; |
| 70 | + Py_ssize_t next_a = 0; |
| 71 | + |
| 72 | + // Always add the first point! |
| 73 | + sampled_x_data[sampled_index] = 0; |
| 74 | + |
| 75 | + sampled_index++; |
| 76 | + Py_ssize_t i; |
| 77 | + for (i = 0; i < threshold - 2; ++i) { |
| 78 | + // Calculate point average for next bucket (containing c) |
| 79 | + double avg_x = threshold; |
| 80 | + double avg_y = 0; |
| 81 | + Py_ssize_t avg_range_start = (Py_ssize_t)(floor((i + 1)* every) + 1); |
| 82 | + Py_ssize_t avg_range_end = (Py_ssize_t)(floor((i + 2) * every) + 1); |
| 83 | + if (avg_range_end >= data_length){ |
| 84 | + avg_range_end = data_length; |
| 85 | + } |
| 86 | + Py_ssize_t avg_range_length = avg_range_end - avg_range_start; |
| 87 | + |
| 88 | + for (;avg_range_start < avg_range_end; avg_range_start++){ |
| 89 | + avg_y += y[avg_range_start]; |
| 90 | + } |
| 91 | + avg_y /= avg_range_length; |
| 92 | + avg_x = avg_range_start + every / 2; |
| 93 | + |
| 94 | + // Get the range for this bucket |
| 95 | + Py_ssize_t range_offs = (Py_ssize_t)(floor((i + 0) * every) + 1); |
| 96 | + Py_ssize_t range_to = (Py_ssize_t)(floor((i + 1) * every) + 1); |
| 97 | + |
| 98 | + |
| 99 | + // Point a |
| 100 | + double point_a_y = y[a]; |
| 101 | + |
| 102 | + double max_area = -1.0; |
| 103 | + for (; range_offs < range_to; range_offs++){ |
| 104 | + // Calculate triangle area over three buckets |
| 105 | + double area = fabs((a - avg_x) * (y[range_offs] - point_a_y) - (a - range_offs) * (avg_y - point_a_y)) * 0.5; |
| 106 | + if (area > max_area){ |
| 107 | + max_area = area; |
| 108 | + next_a = range_offs; // Next a is this b |
| 109 | + } |
| 110 | + } |
| 111 | + // Pick this point from the bucket |
| 112 | + sampled_x_data[sampled_index] = next_a; |
| 113 | + |
| 114 | + sampled_index++; |
| 115 | + |
| 116 | + // Current a becomes the next_a (chosen b) |
| 117 | + a = next_a; |
| 118 | + } |
| 119 | + |
| 120 | + // Always add last! Check for finite values! |
| 121 | + sampled_x_data[sampled_index] = data_length - 1; |
| 122 | + |
| 123 | + // Provide our return value |
| 124 | + PyObject *value = Py_BuildValue("O", sampled_x); |
| 125 | + |
| 126 | + // And remove the references! |
| 127 | + Py_DECREF(x_array); |
| 128 | + Py_DECREF(y_array); |
| 129 | + Py_XDECREF(sampled_x); |
| 130 | + |
| 131 | + return value; |
| 132 | + |
| 133 | +fail: |
| 134 | + Py_DECREF(x_array); |
| 135 | + Py_XDECREF(y_array); |
| 136 | + return NULL; |
| 137 | +} |
| 138 | + |
| 139 | + |
| 140 | +// Method definition object |
| 141 | +static PyMethodDef lttbcv2Methods[] = { |
| 142 | + { |
| 143 | + "downsample_return_index", // The name of the method |
| 144 | + downsample_return_index, // Function pointer to the method implementation |
| 145 | + METH_VARARGS, |
| 146 | + "Compute the largest triangle three buckets (LTTB) algorithm in a C extension." |
| 147 | + }, |
| 148 | + {NULL, NULL, 0, NULL} /* Sentinel */ |
| 149 | +}; |
| 150 | + |
| 151 | +static struct PyModuleDef lttbc_module_definition = { |
| 152 | + PyModuleDef_HEAD_INIT, |
| 153 | + "lttbcv2", /* name of module */ |
| 154 | + "A Python module that computes the largest triangle three buckets algorithm (LTTB) using C code.", |
| 155 | + -1, /* size of per-interpreter state of the module, |
| 156 | + or -1 if the module keeps state in global variables. */ |
| 157 | + lttbcv2Methods |
| 158 | +}; |
| 159 | + |
| 160 | +// Module initialization |
| 161 | +PyMODINIT_FUNC PyInit_lttbcv2(void) { |
| 162 | + Py_Initialize(); |
| 163 | + import_array(); |
| 164 | + return PyModule_Create(<tbc_module_definition); |
| 165 | +} |
0 commit comments