|
| 1 | +/** |
| 2 | + * @file simple_linear_regression.c |
| 3 | + * @brief Simple Linear Regression Algorithm implemented |
| 4 | + * @details |
| 5 | + * this file has Simple Linear Regression Algorithm implemented |
| 6 | + * it calculate intercept and slope for algorithm to predict |
| 7 | + * |
| 8 | + * Note: |
| 9 | + * this is a simple linear regression or |
| 10 | + * single linear regression which is |
| 11 | + * this code only worked for 1D Arrays and only can be applied |
| 12 | + * for 1 Column of predictor variable (x) |
| 13 | + * |
| 14 | + * @author [KeyzarRasya](https://github.com/KeyzarRasya) |
| 15 | + */ |
| 16 | + |
| 17 | +#include <assert.h> /* assert */ |
| 18 | +#include <stdio.h> /* printf, perror */ |
| 19 | +#include <stdlib.h> /* exit, malloc */ |
| 20 | +#include <string.h> /* memcpy */ |
| 21 | +#include <math.h> /* fabs */ |
| 22 | + |
| 23 | +#define EPSILON 0.0001 /* limit of tolerance for testing prediction */ |
| 24 | + |
| 25 | +/*! |
| 26 | + * @enum data_type |
| 27 | + * an enum to storing information |
| 28 | + * about data type used for performing |
| 29 | + * calculation in simple_linear_regression |
| 30 | + */ |
| 31 | +enum data_types { |
| 32 | + TYPE_INT, |
| 33 | + TYPE_FLOAT |
| 34 | +}; |
| 35 | + |
| 36 | + |
| 37 | +/*! @struct simple_linear_regression |
| 38 | + * a class to store predictor (x) and target (y) |
| 39 | + * variables and also for storing the result of |
| 40 | + * slope and intercept |
| 41 | + */ |
| 42 | +struct simple_linear_regression { |
| 43 | + void *predictor; /**< used to predict the target variables */ |
| 44 | + void *target; /**< known as (y) variables or the predicted values */ |
| 45 | + int size; /**< the length of predictor and target variables */ |
| 46 | + float intercept; /**< storing the result of intercept */ |
| 47 | + float slope; /**< storing the result of intercept */ |
| 48 | + enum data_types data_type; /**< the type data used for predictor variables */ |
| 49 | +}; |
| 50 | + |
| 51 | +/*! |
| 52 | + * Return the pointer of simple_linear_regression |
| 53 | + * struct with information passed in parameters |
| 54 | + * |
| 55 | + * @param predictor array of predictors variables (x) |
| 56 | + * @param target array of target variables (y) |
| 57 | + * @param data_type data type of predictor variables |
| 58 | + * @param size size of given predictor variables |
| 59 | +*/ |
| 60 | +struct simple_linear_regression *init_simple_linear_regression( |
| 61 | + void *predictor, void *target, enum data_types data_type, int size) { |
| 62 | + |
| 63 | + /* allocating memory for simple_linear_regression structi */ |
| 64 | + struct simple_linear_regression *regressor = |
| 65 | + (struct simple_linear_regression*) |
| 66 | + malloc(sizeof(struct simple_linear_regression)); |
| 67 | + |
| 68 | + if (!regressor) { |
| 69 | + perror("Failed to allocate memory"); |
| 70 | + return NULL; |
| 71 | + } /* if memory failed to be allocated */ |
| 72 | + |
| 73 | + size_t element_size = (data_type == TYPE_INT) ? sizeof(int) : sizeof(float); |
| 74 | + |
| 75 | + regressor->predictor = malloc(size * element_size); |
| 76 | + regressor->target = malloc(size * element_size); |
| 77 | + |
| 78 | + if (!regressor->predictor || !regressor->target) { |
| 79 | + perror("Failed to allocate memory"); |
| 80 | + return NULL; |
| 81 | + }/* if memory at predictor and target failed to be allocated */ |
| 82 | + |
| 83 | + memcpy(regressor->predictor, predictor, size * element_size); |
| 84 | + memcpy(regressor->target, target, size * element_size); |
| 85 | + |
| 86 | + regressor->data_type = data_type; |
| 87 | + regressor->size = size; |
| 88 | + regressor->intercept = 0.0; |
| 89 | + regressor->slope = 0.0; |
| 90 | + |
| 91 | + return regressor; |
| 92 | +} |
| 93 | + |
| 94 | +/*! |
| 95 | + * Return sum of the array for |
| 96 | + * data typed arrays |
| 97 | + * |
| 98 | + * @param *arr array to be summed |
| 99 | + * @param size size of the given array |
| 100 | + */ |
| 101 | +float sum_float(float *arr, int size) { |
| 102 | + float total = 0.0; |
| 103 | + |
| 104 | + for (int i = 0; i < size; i++) { |
| 105 | + total += arr[i]; |
| 106 | + } |
| 107 | + |
| 108 | + return total; |
| 109 | +} |
| 110 | + |
| 111 | +/*! |
| 112 | + * Returns array values that already squared |
| 113 | + * |
| 114 | + * @param *num the array to be squarred |
| 115 | + * @param size the size of the given arrays |
| 116 | + */ |
| 117 | +float *square_float(float *arr, int size) { |
| 118 | + float *result = malloc(size * sizeof(float)); |
| 119 | + |
| 120 | + for (int i = 0; i < size; i++) { |
| 121 | + result[i] = arr[i] * arr[i]; |
| 122 | + } |
| 123 | + |
| 124 | + return result; |
| 125 | +} |
| 126 | + |
| 127 | +/*! |
| 128 | + * Return the multiplication of predictor (x) |
| 129 | + * and target (y) |
| 130 | + * |
| 131 | + * @param *x predictor variables |
| 132 | + * @param *y target variables |
| 133 | + * @param size size of given arrays |
| 134 | + */ |
| 135 | +float *calculate_x_times_y_float(float *x, float *y, int size) { |
| 136 | + float *result = malloc(size * sizeof(float)); |
| 137 | + |
| 138 | + for (int i = 0; i < size; i++) { |
| 139 | + result[i] = x[i] * y[i]; |
| 140 | + } |
| 141 | + |
| 142 | + return result; |
| 143 | +} |
| 144 | + |
| 145 | +/*! |
| 146 | + * Return value of calculation scope |
| 147 | + * |
| 148 | + * @param n length of predictor and target variables |
| 149 | + * @param sum_x result sum of x variables (predictor) |
| 150 | + * @param sum_xy the result of summed x_times_y |
| 151 | + * @param sum_y result sum of y variables (target) |
| 152 | + * @param sum_x_square the result of summed x squared |
| 153 | + */ |
| 154 | +float calculate_slope(int n, float sum_x, float sum_xy, |
| 155 | + float sum_y, float sum_x_square) { |
| 156 | + float top = (n * sum_xy) - (sum_x * sum_y); |
| 157 | + float bottom = (n * sum_x_square) - (sum_x * sum_x); |
| 158 | + |
| 159 | + if (bottom == 0) { |
| 160 | + printf("\nError: Division by zero in slope calculation!\n"); |
| 161 | + return 0; |
| 162 | + }/* if bottom equals zero, in mathematics this return undefined */ |
| 163 | + |
| 164 | + return top / bottom; |
| 165 | +} |
| 166 | + |
| 167 | +/*! |
| 168 | + * Return the calculation of intercept |
| 169 | + * |
| 170 | + * @param x_mean mean of x variables (predictor) |
| 171 | + * @param y_mean mean of y variables (target) |
| 172 | + * @param slope slope values |
| 173 | + */ |
| 174 | +float calculate_intercept(float x_mean, float y_mean, float slope) { |
| 175 | + return y_mean - (slope * x_mean); |
| 176 | +} |
| 177 | + |
| 178 | +/*! |
| 179 | + * ---------Training------------- |
| 180 | + * 1. Checking the simple_linear_regression data types |
| 181 | + * 2. Calculating all the variables are needed |
| 182 | + * 3. Calculating slope and intercept |
| 183 | + * 4. store slope and intercept values to simple_linear_regression struct |
| 184 | + * |
| 185 | + * @param regressor struct of simple_linear_regression |
| 186 | + */ |
| 187 | +void train(struct simple_linear_regression *regressor) { |
| 188 | + if (regressor->data_type == TYPE_INT) { |
| 189 | + int *x = (int *)regressor->predictor; |
| 190 | + int *y = (int *)regressor->target; |
| 191 | + |
| 192 | + int sum_x = sum_float((float *)x, regressor->size); |
| 193 | + int sum_y = sum_float((float *)y, regressor->size); |
| 194 | + float *x_square = square_float((float *)x, regressor->size); |
| 195 | + float *x_times_y = calculate_x_times_y_float((float *)x, |
| 196 | + (float *)y, regressor->size); |
| 197 | + float sum_x_square = sum_float(x_square, regressor->size); |
| 198 | + float sum_xy = sum_float(x_times_y, regressor->size); |
| 199 | + |
| 200 | + float x_mean = (float)sum_x / regressor->size; |
| 201 | + float y_mean = (float)sum_y / regressor->size; |
| 202 | + |
| 203 | + regressor->slope = calculate_slope(regressor->size, sum_x, |
| 204 | + sum_xy, sum_y, sum_x_square); |
| 205 | + regressor->intercept = calculate_intercept(x_mean, |
| 206 | + y_mean, regressor->slope); |
| 207 | + |
| 208 | + free(x_square); |
| 209 | + free(x_times_y); |
| 210 | + } /*if simple_linear_regression data type is integer */ |
| 211 | + else { |
| 212 | + float *x = (float *)regressor->predictor; |
| 213 | + float *y = (float *)regressor->target; |
| 214 | + |
| 215 | + float sum_x = sum_float(x, regressor->size); |
| 216 | + float sum_y = sum_float(y, regressor->size); |
| 217 | + float *x_square = square_float(x, regressor->size); |
| 218 | + float *x_times_y = calculate_x_times_y_float(x, y, regressor->size); |
| 219 | + float sum_x_square = sum_float(x_square, regressor->size); |
| 220 | + float sum_xy = sum_float(x_times_y, regressor->size); |
| 221 | + |
| 222 | + float x_mean = sum_x / regressor->size; |
| 223 | + float y_mean = sum_y / regressor->size; |
| 224 | + |
| 225 | + regressor->slope = calculate_slope(regressor->size, sum_x, |
| 226 | + sum_xy, sum_y, sum_x_square); |
| 227 | + regressor->intercept = calculate_intercept(x_mean, y_mean, |
| 228 | + regressor->slope); |
| 229 | + |
| 230 | + free(x_square); |
| 231 | + free(x_times_y); |
| 232 | + } /* if simple_linear_regression data_type is float */ |
| 233 | +} |
| 234 | + |
| 235 | +/*! |
| 236 | + * Returning the result of simple_linear_regression prediction |
| 237 | + * |
| 238 | + * @param regressor used simple_linear_regression struct |
| 239 | + * @param pred value that want to be predicted |
| 240 | + */ |
| 241 | +float predict(struct simple_linear_regression *regressor, |
| 242 | + float pred) { |
| 243 | + return regressor->intercept + (regressor->slope * pred); |
| 244 | +} |
| 245 | + |
| 246 | +/*! |
| 247 | + * A function to test simple_linear_regression |
| 248 | + * prediction process |
| 249 | + * |
| 250 | + * x variables is product price |
| 251 | + * y variables is amount of quality sold |
| 252 | + */ |
| 253 | +void test(){ |
| 254 | + float x[] = {50000.0, 55000.0, 60000.0, 65000.0, 70000.0}; |
| 255 | + float y[] = {20.0, 18.0, 15.0, 12.0, 10.0}; |
| 256 | + |
| 257 | + struct simple_linear_regression *regressor = |
| 258 | + init_simple_linear_regression(x, y, TYPE_FLOAT, sizeof(x)/sizeof(float)); |
| 259 | + |
| 260 | + train(regressor); |
| 261 | + |
| 262 | + float pred = 75000.0; |
| 263 | + float prediction = predict(regressor, pred); |
| 264 | + |
| 265 | + printf("\nPredicted value for product price %.2f is %f\n", |
| 266 | + pred, prediction); |
| 267 | + |
| 268 | + assert(fabs(prediction - 7.200008) < EPSILON); |
| 269 | + printf("Assertion Passed\n"); |
| 270 | + free(regressor->predictor); |
| 271 | + free(regressor->target); |
| 272 | + free(regressor); |
| 273 | +} |
| 274 | + |
| 275 | +/*! |
| 276 | + * main function to call the |
| 277 | + * test functions |
| 278 | + */ |
| 279 | +int main() { |
| 280 | + test(); |
| 281 | + return 0; |
| 282 | +} |
0 commit comments