Skip to content

Commit 4173756

Browse files
committed
Correlation matrix should work on data series with nils
1 parent 4e48849 commit 4173756

File tree

2 files changed

+30
-24
lines changed

2 files changed

+30
-24
lines changed

src/DataFrame-Math/DataPearsonCorrelationMethod.class.st

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@ DataPearsonCorrelationMethod class >> between: x and: y [
99
"Calcualte the Pearson correlation coefficient between two data series"
1010

1111
| xDeviation yDeviation |
12+
x size = y size ifFalse: [ SizeMismatch signal: 'Correlation can not be calculated for two series of different size' ].
1213

13-
x size = y size ifFalse: [
14-
SizeMismatch signal: 'Correlation can not be calculated for two series of different size' ].
15-
16-
xDeviation := x - x average.
17-
yDeviation := y - y average.
14+
xDeviation := x replaceNilsWithZeros - x average.
15+
yDeviation := y replaceNilsWithZeros - y average.
1816

1917
^ (xDeviation * yDeviation) sum / ((xDeviation ** 2) sum * (yDeviation ** 2) sum) sqrt
2018
]

src/DataFrame-Tests/DataFrameStatsTest.class.st

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,28 +37,36 @@ DataFrameStatsTest >> testCorrelationMatrix [
3737

3838
| expectedCorrelationMatrix actualCorrelationMatrix |
3939
expectedCorrelationMatrix := DataFrame withRows:
40-
#( #( 1 0.311398 0.538922 0.454601 )
41-
#( 0.311398 1 -0.321281 -0.308023 )
42-
#( 0.538922 -0.321281 1 0.982956 )
43-
#( 0.454601 -0.308023 0.982956
44-
1 ) ).
45-
expectedCorrelationMatrix columnNames:
46-
#( sepalLength sepalWidth petalLength petalWidth ).
47-
expectedCorrelationMatrix rowNames:
48-
#( sepalLength sepalWidth petalLength petalWidth ).
40+
#( #( 1 0.311398 0.538922 0.454601 ) #( 0.311398 1 -0.321281 -0.308023 ) #( 0.538922 -0.321281 1 0.982956 )
41+
#( 0.454601 -0.308023 0.982956 1 ) ).
42+
expectedCorrelationMatrix columnNames: #( sepalLength sepalWidth petalLength petalWidth ).
43+
expectedCorrelationMatrix rowNames: #( sepalLength sepalWidth petalLength petalWidth ).
4944
actualCorrelationMatrix := df correlationMatrix.
50-
self
51-
assert: actualCorrelationMatrix rowNames
52-
equals: expectedCorrelationMatrix rowNames.
53-
self
54-
assert: actualCorrelationMatrix columnNames
55-
equals: expectedCorrelationMatrix columnNames.
45+
self assert: actualCorrelationMatrix rowNames equals: expectedCorrelationMatrix rowNames.
46+
self assert: actualCorrelationMatrix columnNames equals: expectedCorrelationMatrix columnNames.
5647

5748
1 to: actualCorrelationMatrix numberOfColumns do: [ :j |
58-
1 to: actualCorrelationMatrix numberOfRows do: [ :i |
59-
self
60-
assert: (actualCorrelationMatrix at: i at: j)
61-
closeTo: (expectedCorrelationMatrix at: i at: j) ] ]
49+
1 to: actualCorrelationMatrix numberOfRows do: [ :i | self assert: (actualCorrelationMatrix at: i at: j) closeTo: (expectedCorrelationMatrix at: i at: j) ] ]
50+
]
51+
52+
{ #category : #tests }
53+
DataFrameStatsTest >> testCorrelationMatrixWithNils [
54+
55+
| expectedCorrelationMatrix actualCorrelationMatrix |
56+
df := DataFrame withRows: #( #( 1 1 ) #( 2 nil ) #( nil 3 ) #( 4 4 ) ).
57+
df columnNames: #( dogs cats ).
58+
59+
expectedCorrelationMatrix := DataFrame withRows: #( #( 1 0.3207134902949093 ) #( 0.3207134902949093 1 ) ).
60+
expectedCorrelationMatrix columnNames: #( dogs cats ).
61+
expectedCorrelationMatrix rowNames: #( dogs cats ).
62+
63+
actualCorrelationMatrix := df correlationMatrix.
64+
65+
self assert: actualCorrelationMatrix rowNames equals: expectedCorrelationMatrix rowNames.
66+
self assert: actualCorrelationMatrix columnNames equals: expectedCorrelationMatrix columnNames.
67+
68+
1 to: actualCorrelationMatrix numberOfColumns do: [ :j |
69+
1 to: actualCorrelationMatrix numberOfRows do: [ :i | self assert: (actualCorrelationMatrix at: i at: j) closeTo: (expectedCorrelationMatrix at: i at: j) ] ]
6270
]
6371

6472
{ #category : #tests }

0 commit comments

Comments
 (0)