Skip to content

Commit 4303ca4

Browse files
committed
feat: adding new machine learning algorithm
1 parent e5dad3f commit 4303ca4

File tree

1 file changed

+282
-0
lines changed

1 file changed

+282
-0
lines changed
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
/**
2+
* @file simple_linear_regression.c
3+
* @brief Simple Linear Regression Algorithm implemented
4+
* @details
5+
* this file has Simple Linear Regression Algorithm implemented
6+
* it calculate intercept and slope for algorithm to predict
7+
*
8+
* Note:
9+
* this is a simple linear regression or
10+
* single linear regression which is
11+
* this code only worked for 1D Arrays and only can be applied
12+
* for 1 Column of predictor variable (x)
13+
*
14+
* @author [KeyzarRasya](https://github.com/KeyzarRasya)
15+
*/
16+
17+
#include <assert.h> /* assert */
18+
#include <stdio.h> /* printf, perror */
19+
#include <stdlib.h> /* exit, malloc */
20+
#include <string.h> /* memcpy */
21+
#include <math.h> /* fabs */
22+
23+
#define EPSILON 0.0001 /* limit of tolerance for testing prediction */
24+
25+
/*!
26+
* @enum data_type
27+
* an enum to storing information
28+
* about data type used for performing
29+
* calculation in simple_linear_regression
30+
*/
31+
enum data_types {
32+
TYPE_INT,
33+
TYPE_FLOAT
34+
};
35+
36+
37+
/*! @struct simple_linear_regression
38+
* a class to store predictor (x) and target (y)
39+
* variables and also for storing the result of
40+
* slope and intercept
41+
*/
42+
struct simple_linear_regression {
43+
void *predictor; /**< used to predict the target variables */
44+
void *target; /**< known as (y) variables or the predicted values */
45+
int size; /**< the length of predictor and target variables */
46+
float intercept; /**< storing the result of intercept */
47+
float slope; /**< storing the result of intercept */
48+
enum data_types data_type; /**< the type data used for predictor variables */
49+
};
50+
51+
/*!
52+
* Return the pointer of simple_linear_regression
53+
* struct with information passed in parameters
54+
*
55+
* @param predictor array of predictors variables (x)
56+
* @param target array of target variables (y)
57+
* @param data_type data type of predictor variables
58+
* @param size size of given predictor variables
59+
*/
60+
struct simple_linear_regression *init_simple_linear_regression(
61+
void *predictor, void *target, enum data_types data_type, int size) {
62+
63+
/* allocating memory for simple_linear_regression structi */
64+
struct simple_linear_regression *regressor =
65+
(struct simple_linear_regression*)
66+
malloc(sizeof(struct simple_linear_regression));
67+
68+
if (!regressor) {
69+
perror("Failed to allocate memory");
70+
return NULL;
71+
} /* if memory failed to be allocated */
72+
73+
size_t element_size = (data_type == TYPE_INT) ? sizeof(int) : sizeof(float);
74+
75+
regressor->predictor = malloc(size * element_size);
76+
regressor->target = malloc(size * element_size);
77+
78+
if (!regressor->predictor || !regressor->target) {
79+
perror("Failed to allocate memory");
80+
return NULL;
81+
}/* if memory at predictor and target failed to be allocated */
82+
83+
memcpy(regressor->predictor, predictor, size * element_size);
84+
memcpy(regressor->target, target, size * element_size);
85+
86+
regressor->data_type = data_type;
87+
regressor->size = size;
88+
regressor->intercept = 0.0;
89+
regressor->slope = 0.0;
90+
91+
return regressor;
92+
}
93+
94+
/*!
95+
* Return sum of the array for
96+
* data typed arrays
97+
*
98+
* @param *arr array to be summed
99+
* @param size size of the given array
100+
*/
101+
float sum_float(float *arr, int size) {
102+
float total = 0.0;
103+
104+
for (int i = 0; i < size; i++) {
105+
total += arr[i];
106+
}
107+
108+
return total;
109+
}
110+
111+
/*!
112+
* Returns array values that already squared
113+
*
114+
* @param *num the array to be squarred
115+
* @param size the size of the given arrays
116+
*/
117+
float *square_float(float *arr, int size) {
118+
float *result = malloc(size * sizeof(float));
119+
120+
for (int i = 0; i < size; i++) {
121+
result[i] = arr[i] * arr[i];
122+
}
123+
124+
return result;
125+
}
126+
127+
/*!
128+
* Return the multiplication of predictor (x)
129+
* and target (y)
130+
*
131+
* @param *x predictor variables
132+
* @param *y target variables
133+
* @param size size of given arrays
134+
*/
135+
float *calculate_x_times_y_float(float *x, float *y, int size) {
136+
float *result = malloc(size * sizeof(float));
137+
138+
for (int i = 0; i < size; i++) {
139+
result[i] = x[i] * y[i];
140+
}
141+
142+
return result;
143+
}
144+
145+
/*!
146+
* Return value of calculation scope
147+
*
148+
* @param n length of predictor and target variables
149+
* @param sum_x result sum of x variables (predictor)
150+
* @param sum_xy the result of summed x_times_y
151+
* @param sum_y result sum of y variables (target)
152+
* @param sum_x_square the result of summed x squared
153+
*/
154+
float calculate_slope(int n, float sum_x, float sum_xy,
155+
float sum_y, float sum_x_square) {
156+
float top = (n * sum_xy) - (sum_x * sum_y);
157+
float bottom = (n * sum_x_square) - (sum_x * sum_x);
158+
159+
if (bottom == 0) {
160+
printf("\nError: Division by zero in slope calculation!\n");
161+
return 0;
162+
}/* if bottom equals zero, in mathematics this return undefined */
163+
164+
return top / bottom;
165+
}
166+
167+
/*!
168+
* Return the calculation of intercept
169+
*
170+
* @param x_mean mean of x variables (predictor)
171+
* @param y_mean mean of y variables (target)
172+
* @param slope slope values
173+
*/
174+
float calculate_intercept(float x_mean, float y_mean, float slope) {
175+
return y_mean - (slope * x_mean);
176+
}
177+
178+
/*!
179+
* ---------Training-------------
180+
* 1. Checking the simple_linear_regression data types
181+
* 2. Calculating all the variables are needed
182+
* 3. Calculating slope and intercept
183+
* 4. store slope and intercept values to simple_linear_regression struct
184+
*
185+
* @param regressor struct of simple_linear_regression
186+
*/
187+
void train(struct simple_linear_regression *regressor) {
188+
if (regressor->data_type == TYPE_INT) {
189+
int *x = (int *)regressor->predictor;
190+
int *y = (int *)regressor->target;
191+
192+
int sum_x = sum_float((float *)x, regressor->size);
193+
int sum_y = sum_float((float *)y, regressor->size);
194+
float *x_square = square_float((float *)x, regressor->size);
195+
float *x_times_y = calculate_x_times_y_float((float *)x,
196+
(float *)y, regressor->size);
197+
float sum_x_square = sum_float(x_square, regressor->size);
198+
float sum_xy = sum_float(x_times_y, regressor->size);
199+
200+
float x_mean = (float)sum_x / regressor->size;
201+
float y_mean = (float)sum_y / regressor->size;
202+
203+
regressor->slope = calculate_slope(regressor->size, sum_x,
204+
sum_xy, sum_y, sum_x_square);
205+
regressor->intercept = calculate_intercept(x_mean,
206+
y_mean, regressor->slope);
207+
208+
free(x_square);
209+
free(x_times_y);
210+
} /*if simple_linear_regression data type is integer */
211+
else {
212+
float *x = (float *)regressor->predictor;
213+
float *y = (float *)regressor->target;
214+
215+
float sum_x = sum_float(x, regressor->size);
216+
float sum_y = sum_float(y, regressor->size);
217+
float *x_square = square_float(x, regressor->size);
218+
float *x_times_y = calculate_x_times_y_float(x, y, regressor->size);
219+
float sum_x_square = sum_float(x_square, regressor->size);
220+
float sum_xy = sum_float(x_times_y, regressor->size);
221+
222+
float x_mean = sum_x / regressor->size;
223+
float y_mean = sum_y / regressor->size;
224+
225+
regressor->slope = calculate_slope(regressor->size, sum_x,
226+
sum_xy, sum_y, sum_x_square);
227+
regressor->intercept = calculate_intercept(x_mean, y_mean,
228+
regressor->slope);
229+
230+
free(x_square);
231+
free(x_times_y);
232+
} /* if simple_linear_regression data_type is float */
233+
}
234+
235+
/*!
236+
* Returning the result of simple_linear_regression prediction
237+
*
238+
* @param regressor used simple_linear_regression struct
239+
* @param pred value that want to be predicted
240+
*/
241+
float predict(struct simple_linear_regression *regressor,
242+
float pred) {
243+
return regressor->intercept + (regressor->slope * pred);
244+
}
245+
246+
/*!
247+
* A function to test simple_linear_regression
248+
* prediction process
249+
*
250+
* x variables is product price
251+
* y variables is amount of quality sold
252+
*/
253+
void test(){
254+
float x[] = {50000.0, 55000.0, 60000.0, 65000.0, 70000.0};
255+
float y[] = {20.0, 18.0, 15.0, 12.0, 10.0};
256+
257+
struct simple_linear_regression *regressor =
258+
init_simple_linear_regression(x, y, TYPE_FLOAT, sizeof(x)/sizeof(float));
259+
260+
train(regressor);
261+
262+
float pred = 75000.0;
263+
float prediction = predict(regressor, pred);
264+
265+
printf("\nPredicted value for product price %.2f is %f\n",
266+
pred, prediction);
267+
268+
assert(fabs(prediction - 7.200008) < EPSILON);
269+
printf("Assertion Passed\n");
270+
free(regressor->predictor);
271+
free(regressor->target);
272+
free(regressor);
273+
}
274+
275+
/*!
276+
* main function to call the
277+
* test functions
278+
*/
279+
int main() {
280+
test();
281+
return 0;
282+
}

0 commit comments

Comments
 (0)