Skip to content

Commit 3ace3a4

Browse files
committed
0.2.0 Correlation
1 parent 2e46b6f commit 3ace3a4

File tree

10 files changed

+391
-67
lines changed

10 files changed

+391
-67
lines changed

libraries/Correlation/Correlation.cpp

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
//
22
// FILE: Correlation.cpp
33
// AUTHOR: Rob Tillaart
4-
// VERSION: 0.1.4
4+
// VERSION: 0.2.0
55
// PURPOSE: Arduino Library to determine correlation between X and Y dataset
66
//
77
// HISTORY:
8-
// 0.1.4 2021-08-26 improve performance calculate
8+
// 0.2.0 2021-08-26 Add flags to skip Rsquare and Esquare calculation
9+
// will improve performance calculate
10+
// fixed sign of R correlation coefficient
911
//
10-
// 0.1.3 2021-01-16 add size in constructor,
12+
// 0.1.4 2021-08-26 improve performance calculate
13+
// 0.1.3 2021-01-16 add size in constructor,
1114
// add statistical + debug functions
1215
// 0.1.2 2020-12-17 add arduino-CI + unit tests
1316
// + size() + getAvgX() + getAvgY()
@@ -21,7 +24,8 @@
2124

2225
Correlation::Correlation(uint8_t size)
2326
{
24-
_size = size;
27+
_size = 20;
28+
if (size > 0) _size = size;
2529
_x = (float *) malloc(_size * sizeof(float));
2630
_y = (float *) malloc(_size * sizeof(float));
2731
clear();
@@ -30,8 +34,8 @@ Correlation::Correlation(uint8_t size)
3034

3135
Correlation::~Correlation()
3236
{
33-
free(_x);
34-
free(_y);
37+
if (_x) free(_x);
38+
if (_y) free(_y);
3539
}
3640

3741

@@ -45,11 +49,13 @@ void Correlation::clear()
4549
_avgY = 0;
4650
_a = 0;
4751
_b = 0;
48-
_rSquare = 0;
52+
_r = 0;
4953
_sumErrorSquare = 0;
5054
_sumXiYi = 0;
5155
_sumXi2 = 0;
5256
_sumYi2 = 0;
57+
_doR2 = true;
58+
_doE2 = true;
5359
}
5460

5561

@@ -69,10 +75,10 @@ bool Correlation::add(float x, float y)
6975
}
7076

7177

72-
bool Correlation::calculate()
78+
bool Correlation::calculate(bool forced)
7379
{
7480
if (_count == 0) return false;
75-
if (!_needRecalculate) return true;
81+
if (! (_needRecalculate || forced)) return true;
7682

7783
// CALC AVERAGE X, AVERAGE Y
7884
float avgx = 0;
@@ -84,7 +90,7 @@ bool Correlation::calculate()
8490
}
8591
avgx /= _count;
8692
avgy /= _count;
87-
93+
8894
_avgX = avgx;
8995
_avgY = avgy;
9096

@@ -102,25 +108,31 @@ bool Correlation::calculate()
102108
}
103109
float b = sumXiYi / sumXi2;
104110
float a = avgy - b * avgx;
105-
// bool CORLIB_CALC_R_SQUARE
106-
_rSquare = sumXiYi * sumXiYi / (sumXi2 * sumYi2);
107-
111+
108112
_a = a;
109113
_b = b;
110-
_sumXiYi = sumXiYi;
111-
_sumXi2 = sumXi2;
112-
_sumYi2 = sumYi2;
114+
_sumXiYi = sumXiYi;
115+
_sumXi2 = sumXi2;
116+
_sumYi2 = sumYi2;
113117

114-
// bool CORLIB_CALC_E_SQUARE
115-
// CALC _sumErrorSquare
116-
float sumErrorSquare = 0;
117-
for (uint8_t i = 0; i < _count; i++)
118+
if (_doR2 == true)
119+
{
120+
// R is calculated instead of rSquared so we do not loose the sign.
121+
// Rsquare from R is much faster than R from Rsquare.
122+
_r = sumXiYi / sqrt(sumXi2 * sumYi2);
123+
}
124+
125+
if (_doE2 == true)
118126
{
119-
float EY = a + b * _x[i];
120-
float ei = _y[i] - EY;
121-
sumErrorSquare += (ei * ei);
127+
float sumErrorSquare = 0;
128+
for (uint8_t i = 0; i < _count; i++)
129+
{
130+
float EY = a + b * _x[i];
131+
float ei = _y[i] - EY;
132+
sumErrorSquare += (ei * ei);
133+
}
134+
_sumErrorSquare = sumErrorSquare;
122135
}
123-
_sumErrorSquare = sumErrorSquare;
124136
_needRecalculate = false;
125137
return true;
126138
}

libraries/Correlation/Correlation.h

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// FILE: Correlation.h
44
// AUTHOR: Rob Tillaart
5-
// VERSION: 0.1.4
5+
// VERSION: 0.2.0
66
// PURPOSE: Calculate Correlation from a small dataset.
77
// HISTORY: See Correlation.cpp
88
//
@@ -11,7 +11,7 @@
1111
#include "Arduino.h"
1212

1313

14-
#define CORRELATION_LIB_VERSION (F("0.1.4"))
14+
#define CORRELATION_LIB_VERSION (F("0.2.0"))
1515

1616

1717
class Correlation
@@ -20,42 +20,65 @@ class Correlation
2020
Correlation(uint8_t size = 20); // WARNING calculate memory usage !!
2121
~Correlation();
2222

23-
// returns true if the value is added to internal array.
23+
// returns true if the pair of values is added to internal array.
2424
// returns false when internal array is full.
2525
bool add(float x, float y);
2626

27+
// administrative functions
2728
uint8_t count() { return _count; };
2829
uint8_t size() { return _size; };
2930
void clear();
3031

31-
// in running mode, adding new values will replace old ones
32-
// this constantly adapts the regression params A and B.
32+
33+
// in running mode, adding new pair of values will replace old ones
34+
// this constantly adapts the regression parameters A and B (iff calculate is called)
3335
void setRunningCorrelation(bool rc) { _runningMode = rc; };
3436
bool getRunningCorrelation() { return _runningMode; };
3537

36-
// worker, to calculate the correlation params.
37-
// MUST be called before getting the params A, B, R, Rsquare, Esquare,
38-
// avgX and avgY
38+
39+
// worker, to calculate the correlation parameters.
40+
// MUST be called before retrieving the parameters
41+
// A, B, R, Rsquare, Esquare, avgX and avgY
42+
//
43+
// parameter forced overrules the _needRecalculate flag.
44+
// forced is default false to maintain backwards compatibility
45+
//
3946
// returns false if contains no elements ==> count() == 0
40-
bool calculate();
47+
bool calculate(bool forced = false);
48+
// enables / disables R, Rsquare and Esquare calculation
49+
// This can be used to speed up the calculate function if
50+
// these values are not used in your project.
51+
void setR2Calculation(bool doR2) { _doR2 = doR2; };
52+
bool getR2Calculation() { return _doR2; };
53+
void setE2Calculation(bool doE2) { _doE2 = doE2; };
54+
bool getE2Calculation() { return _doE2; };
55+
4156

4257
// Y = A + B * X
58+
// note if no elements are added or calculate is not called
59+
// the values for A and B are 0
4360
float getA() { return _a; };
4461
float getB() { return _b; };
4562

46-
// returns R == correlation coefficient
47-
float getR() { return sqrt(_rSquare); };
48-
float getRsquare() { return _rSquare; };
49-
50-
// returns sum of the errors squared
63+
64+
// getR() returns correlation coefficient (0.2.0 fixed sign)
65+
float getR() { return _r; };
66+
float getRsquare() { return _r * _r; };
67+
68+
69+
// returns sum of the errors squared == indication of 'spread'
70+
// the smaller this value the more the points are on/near one line.
5171
float getEsquare() { return _sumErrorSquare; };
5272

53-
// get the average values of the datasets (as it is available)
73+
74+
// get the average values of the datasets (if count > 0)
5475
float getAvgX() { return _avgX; };
5576
float getAvgY() { return _avgY; };
56-
77+
78+
5779
// based on the dataset get the estimated values for X and Y
58-
// library does not return confidence interval for these.
80+
// it uses the last calculated A and B
81+
// library does not return a confidence interval for these values.
5982
float getEstimateY(float x);
6083
float getEstimateX(float y);
6184

@@ -73,6 +96,7 @@ class Correlation
7396
bool setY(uint8_t idx, float y); // returns true if succeeded
7497
float getX(uint8_t idx); // idem
7598
float getY(uint8_t idx); // idem
99+
76100
float getSumXiYi() { return _sumXiYi; };
77101
float getSumXi2() { return _sumXi2; };
78102
float getSumYi2() { return _sumYi2; };
@@ -84,6 +108,8 @@ class Correlation
84108
uint8_t _count = 0;
85109
bool _runningMode = false;
86110
bool _needRecalculate = true;
111+
bool _doE2 = true;
112+
bool _doR2 = true;
87113

88114
float * _x;
89115
float * _y;
@@ -92,7 +118,7 @@ class Correlation
92118
float _avgY;
93119
float _a;
94120
float _b;
95-
float _rSquare;
121+
float _r;
96122
float _sumErrorSquare;
97123
float _sumXiYi;
98124
float _sumXi2;

libraries/Correlation/README.md

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,15 @@ This library calculates the coefficients of the linear correlation
1717
between two (relative small) datasets. The size of these datasets is
1818
20 by default. The size can be set in the constructor.
1919

20-
The formula of the correlation is expressed as **Y = A + B \* X**,
21-
2220
Please note that the correlation uses about ~50 bytes per instance,
2321
and 2 floats == 8 bytes per pair of elements.
2422
So ~120 elements will use up 50% of the RAM of an UNO.
2523

24+
The formula of the correlation is expressed as **Y = A + B \* X**,
25+
26+
If all points are on a vertical line, the parameter B will be NAN,
27+
This will happen if the **sumXi2** is zero or very small.
28+
2629
Use with care.
2730

2831

@@ -31,47 +34,75 @@ Use with care.
3134

3235
### Constructor
3336

34-
- **Correlation(uint8_t size = 20)** allocates the array needed and resets internal admin.
37+
- **Correlation(uint8_t size = 20)** allocates the array needed and resets internal admin. Size should be between 1 and 255. Size = 0 will set the size to 20.
3538
- **~Correlation()** frees the allocated arrays.
3639

3740

3841
### Base functions
3942

40-
- **bool add(float x, float y)** adds a pair of **floats** to the internal storage.
43+
- **bool add(float x, float y)** adds a pair of **floats** to the internal storage arrays's.
4144
Returns true if the value is added, returns false when internal array is full.
42-
When running correlation is set, it will replace the oldest element and return true.
43-
- **uint8_t count()** returns the amount of items in the internal arrays.
45+
When running correlation is set, **add()** will replace the oldest element and return true.
46+
Warning: **add()** does not check if the floats are NAN or INFINITE.
47+
- **uint8_t count()** returns the amount of items in the internal arrays.
48+
This number is always between 0 ..**size()**
4449
- **uint8_t size()** returns the size of the internal arrays.
45-
- **void clear()** resets the datastructure to start condition (zero elements added)
50+
- **void clear()** resets the data structures to the start condition (zero elements added)
4651
- **bool calculate()** does the math to calculate the correlation parameters A, B and R.
4752
This function will be called automatically when needed.
4853
You can call it on a more convenient time.
4954
Returns false if nothing to calculate **count == 0**
55+
- **void setR2Calculation(bool)** enables / disables the calculation of Rsquared.
56+
- **bool getR2Calculation()** returns the flag set.
57+
- **void setE2Calculation(bool)** enables / disables the calculation of Esquared.
58+
- **bool getE2Calculation()** returns the flag set.
59+
60+
After the calculation the following functions can be called to return the core values.
5061
- **float getA()** returns the A parameter of formula **Y = A + B \* X**
5162
- **float getB()** returns the B parameter of formula **Y = A + B \* X**
52-
- **float getR()** returns the correlation coefficient R.
63+
- **float getR()** returns the correlation coefficient R which is always between -1 .. 1
5364
The closer to 0 the less correlation there is between X and Y.
5465
Correlation can be positive or negative.
55-
Most often the R squared **sqr(R)** is used.
56-
- **float getRsquare()** returns the **sqr(R)** which is always between 0.. 1.
66+
Most often the Rsquare **R x R** is used.
67+
- **float getRsquare()** returns **R x R** which is always between 0.. 1.
5768
- **float getEsquare()** returns the error squared to get an indication of the
58-
quality of the relation.
69+
quality of the correlation.
5970
- **float getAvgX()** returns the average of all elements in the X dataset.
6071
- **float getAvgY()** returns the average of all elements in the Y dataset.
6172
- **float getEstimateX(float y)** use to calculate the estimated X for a given Y.
6273
- **float getEstimateY(float x)** use to calculate the estimated Y for a given X.
6374

6475

76+
#### Correlation Coefficient R
77+
78+
Indicative description of the correlation
79+
80+
| R | correlation |
81+
|:-------------:|:--------------|
82+
| +1.0 | Perfect |
83+
| +0.8 to +1.0 | Very strong |
84+
| +0.6 to +0.8 | Strong |
85+
| +0.4 to +0.6 | Moderate |
86+
| +0.2 to +0.4 | Weak |
87+
| 0.0 to +0.2 | Very weak |
88+
| 0.0 to -0.2 | Very weak |
89+
| -0.2 to -0.4 | Weak |
90+
| -0.4 to -0.6 | Moderate |
91+
| -0.6 to -0.8 | Strong |
92+
| -0.8 to -1.0 | Very strong |
93+
| -1.0 | Perfect |
94+
95+
6596
### Running correlation
6697

6798
- **void setRunningCorrelation(bool rc)** sets the internal variable
6899
runningMode which allows **add()** to overwrite old elements in the
69100
internal arrays.
70-
- **bool getRunningCorrelation()** returns the runningMOde flag.
101+
- **bool getRunningCorrelation()** returns the runningMode flag.
71102

72-
The running correlation will be calculated over the last **count** elements.
73-
This allows for more adaptive formula finding e.g. find the relation between
74-
temperature and humidity per hour.
103+
The running correlation will be calculated over the last **count** elements. If the array is full, count will be size.
104+
This running correlation allows for more adaptive formula finding e.g. find the relation between
105+
temperature and humidity per hour, and how it changes over time.
75106

76107

77108
### Statistical
@@ -89,10 +120,10 @@ It also depends on **R** of course. Idem for **getEstimateY()**
89120

90121
### Debugging / educational
91122

92-
Normally not used.
123+
Normally not used. For all these functions idx should be < count!
93124

94125
- **bool setXY(uint8_t idx, float x, float y)** overwrites a pair of values.
95-
Returns true if succeeded, idx should be < count!
126+
Returns true if succeeded.
96127
- **bool setX(uint8_t idx, float x)** overwrites single X.
97128
- **bool setY(uint8_t idx, float y)** overwrites single Y.
98129
- **float getX(uint8_t idx)** returns single value.
@@ -106,7 +137,7 @@ Returns true if succeeded, idx should be < count!
106137

107138
- Template version
108139
The constructor should get a TYPE parameter, as this
109-
allows smaller datatypes to be analyzed taking less memory.
140+
allows smaller data types to be analysed taking less memory.
110141

111142

112143
## Operation

0 commit comments

Comments
 (0)