Skip to content

Commit e8143e2

Browse files
authored
Merge pull request #194 from jecisc/fix-various-bugs
Fix various bugs
2 parents f6723a9 + 0bc36d0 commit e8143e2

File tree

6 files changed

+156
-25
lines changed

6 files changed

+156
-25
lines changed

src/DataFrame-Math/DataPearsonCorrelationMethod.class.st

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@ DataPearsonCorrelationMethod class >> between: x and: y [
99
"Calcualte the Pearson correlation coefficient between two data series"
1010

1111
| xDeviation yDeviation |
12+
x size = y size ifFalse: [ SizeMismatch signal: 'Correlation can not be calculated for two series of different size' ].
1213

13-
x size = y size ifFalse: [
14-
SizeMismatch signal: 'Correlation can not be calculated for two series of different size' ].
15-
16-
xDeviation := x - x average.
17-
yDeviation := y - y average.
14+
xDeviation := (x values replaceAll: nil with: 0) - x average.
15+
yDeviation := (y values replaceAll: nil with: 0) - y average.
1816

1917
^ (xDeviation * yDeviation) sum / ((xDeviation ** 2) sum * (yDeviation ** 2) sum) sqrt
2018
]

src/DataFrame-Tests/DataFrameStatsTest.class.st

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,33 +32,58 @@ DataFrameStatsTest >> testAverage [
3232
self assert: actual closeTo: expected
3333
]
3434

35+
{ #category : #tests }
36+
DataFrameStatsTest >> testAverageWithNils [
37+
38+
| expected actual |
39+
df := DataFrame withRows: #( #( 1 1 ) #( 2 nil ) #( nil 3 ) #( 4 4 ) ).
40+
df columnNames: #( dogs cats ).
41+
42+
expected := {
43+
(7 / 3).
44+
(8 / 3) } asDataSeries.
45+
expected name: #average.
46+
expected keys: df columnNames.
47+
48+
actual := df average.
49+
self assert: actual closeTo: expected
50+
]
51+
3552
{ #category : #tests }
3653
DataFrameStatsTest >> testCorrelationMatrix [
3754

3855
| expectedCorrelationMatrix actualCorrelationMatrix |
3956
expectedCorrelationMatrix := DataFrame withRows:
40-
#( #( 1 0.311398 0.538922 0.454601 )
41-
#( 0.311398 1 -0.321281 -0.308023 )
42-
#( 0.538922 -0.321281 1 0.982956 )
43-
#( 0.454601 -0.308023 0.982956
44-
1 ) ).
45-
expectedCorrelationMatrix columnNames:
46-
#( sepalLength sepalWidth petalLength petalWidth ).
47-
expectedCorrelationMatrix rowNames:
48-
#( sepalLength sepalWidth petalLength petalWidth ).
57+
#( #( 1 0.311398 0.538922 0.454601 ) #( 0.311398 1 -0.321281 -0.308023 ) #( 0.538922 -0.321281 1 0.982956 )
58+
#( 0.454601 -0.308023 0.982956 1 ) ).
59+
expectedCorrelationMatrix columnNames: #( sepalLength sepalWidth petalLength petalWidth ).
60+
expectedCorrelationMatrix rowNames: #( sepalLength sepalWidth petalLength petalWidth ).
4961
actualCorrelationMatrix := df correlationMatrix.
50-
self
51-
assert: actualCorrelationMatrix rowNames
52-
equals: expectedCorrelationMatrix rowNames.
53-
self
54-
assert: actualCorrelationMatrix columnNames
55-
equals: expectedCorrelationMatrix columnNames.
62+
self assert: actualCorrelationMatrix rowNames equals: expectedCorrelationMatrix rowNames.
63+
self assert: actualCorrelationMatrix columnNames equals: expectedCorrelationMatrix columnNames.
64+
65+
1 to: actualCorrelationMatrix numberOfColumns do: [ :j |
66+
1 to: actualCorrelationMatrix numberOfRows do: [ :i | self assert: (actualCorrelationMatrix at: i at: j) closeTo: (expectedCorrelationMatrix at: i at: j) ] ]
67+
]
68+
69+
{ #category : #tests }
70+
DataFrameStatsTest >> testCorrelationMatrixWithNils [
71+
72+
| expectedCorrelationMatrix actualCorrelationMatrix |
73+
df := DataFrame withRows: #( #( 1 1 ) #( 2 nil ) #( nil 3 ) #( 4 4 ) ).
74+
df columnNames: #( dogs cats ).
75+
76+
expectedCorrelationMatrix := DataFrame withRows: #( #( 1 0.4174555390689118 ) #( 0.4174555390689118 1 ) ).
77+
expectedCorrelationMatrix columnNames: #( dogs cats ).
78+
expectedCorrelationMatrix rowNames: #( dogs cats ).
79+
80+
actualCorrelationMatrix := df correlationMatrix.
81+
82+
self assert: actualCorrelationMatrix rowNames equals: expectedCorrelationMatrix rowNames.
83+
self assert: actualCorrelationMatrix columnNames equals: expectedCorrelationMatrix columnNames.
5684

5785
1 to: actualCorrelationMatrix numberOfColumns do: [ :j |
58-
1 to: actualCorrelationMatrix numberOfRows do: [ :i |
59-
self
60-
assert: (actualCorrelationMatrix at: i at: j)
61-
closeTo: (expectedCorrelationMatrix at: i at: j) ] ]
86+
1 to: actualCorrelationMatrix numberOfRows do: [ :i | self assert: (actualCorrelationMatrix at: i at: j) closeTo: (expectedCorrelationMatrix at: i at: j) ] ]
6287
]
6388

6489
{ #category : #tests }

src/DataFrame-Tests/DataFrameTest.class.st

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,33 @@ DataFrameTest >> testColumnsSubset [
898898
self assert: actualDataFrame equals: expectedDataFrame
899899
]
900900

901+
{ #category : #tests }
902+
DataFrameTest >> testCopy [
903+
904+
| copy |
905+
copy := df copy.
906+
907+
self assert: copy equals: df.
908+
self deny: copy identicalTo: df.
909+
910+
df addRow: #( 'Paris' 7 false ) named: 'D'.
911+
912+
self assert: df size equals: 4.
913+
self assert: copy size equals: 3
914+
]
915+
916+
{ #category : #tests }
917+
DataFrameTest >> testCopy2 [
918+
919+
| copy |
920+
copy := df copy.
921+
922+
df addColumn: #( false true true ) named: 'Like it'.
923+
924+
self assert: df numberOfColumns equals: 4.
925+
self assert: copy numberOfColumns equals: 3
926+
]
927+
901928
{ #category : #tests }
902929
DataFrameTest >> testCreateDataFrameWith3ColumnsAndNoRows [
903930
| dataFrame |

src/DataFrame-Tests/DataSeriesTest.class.st

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,18 @@ DataSeriesTest >> testAtTransformIfAbsent [
531531
self assert: exceptionBlockEvaluated
532532
]
533533

534+
{ #category : #'tests - arithmetic' }
535+
DataSeriesTest >> testAverage [
536+
537+
self assert: #( 1 2 3 4 ) asDataSeries average equals: 5 / 2
538+
]
539+
540+
{ #category : #'tests - arithmetic' }
541+
DataSeriesTest >> testAverageWithNils [
542+
543+
self assert: #( 2 nil 4 ) asDataSeries average equals: 3
544+
]
545+
534546
{ #category : #'tests - comparing' }
535547
DataSeriesTest >> testBooleanGreaterThanEqualFromScalar [
536548

@@ -981,6 +993,21 @@ DataSeriesTest >> testCollectWithNotNils [
981993
self assert: actual equals: expected
982994
]
983995

996+
{ #category : #running }
997+
DataSeriesTest >> testCopy [
998+
999+
| copy |
1000+
copy := series copy.
1001+
1002+
self assert: copy equals: series.
1003+
self deny: copy identicalTo: series.
1004+
1005+
series add: $l -> 30.
1006+
1007+
self assert: series size equals: 12.
1008+
self assert: copy size equals: 11
1009+
]
1010+
9841011
{ #category : #'tests - copying' }
9851012
DataSeriesTest >> testCopyCanBeChanged [
9861013

@@ -1392,6 +1419,19 @@ DataSeriesTest >> testInjectInto [
13921419
self assert: actual equals: expected
13931420
]
13941421

1422+
{ #category : #'tests - testing' }
1423+
DataSeriesTest >> testIsNumerical [
1424+
1425+
self assert: #( 1 2 3 ) asDataSeries isNumerical.
1426+
self deny: #( 1 2 '3' ) asDataSeries isNumerical
1427+
]
1428+
1429+
{ #category : #'tests - testing' }
1430+
DataSeriesTest >> testIsNumericalWithNils [
1431+
1432+
self assert: #( 1 nil 3 ) asDataSeries isNumerical
1433+
]
1434+
13951435
{ #category : #'tests - accessing' }
13961436
DataSeriesTest >> testLast [
13971437

@@ -2095,6 +2135,19 @@ DataSeriesTest >> testStatsZerothQuartileEqualsMin [
20952135
self assert: series zerothQuartile equals: series min
20962136
]
20972137

2138+
{ #category : #'tests - arithmetic' }
2139+
DataSeriesTest >> testSum [
2140+
2141+
self assert: #( 1 2 3 4 ) asDataSeries sum equals: 10
2142+
]
2143+
2144+
{ #category : #'tests - arithmetic' }
2145+
DataSeriesTest >> testSumWithNils [
2146+
2147+
self assert: #( 1 2 nil 4 ) asDataSeries sum equals: 7.
2148+
self assert: #( nil nil nil ) asDataSeries sum equals: 0
2149+
]
2150+
20982151
{ #category : #'tests - head/tail' }
20992152
DataSeriesTest >> testTail [
21002153
| expected actual |

src/DataFrame/DataFrame.class.st

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,16 @@ DataFrame >> outerJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [
14121412
^ outputDf
14131413
]
14141414

1415+
{ #category : #copying }
1416+
DataFrame >> postCopy [
1417+
1418+
super postCopy.
1419+
contents := contents copy.
1420+
rowNames := rowNames copy.
1421+
columnNames := columnNames copy.
1422+
dataTypes := dataTypes copy
1423+
]
1424+
14151425
{ #category : #printing }
14161426
DataFrame >> printOn: aStream [
14171427

src/DataFrame/DataSeries.class.st

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,13 @@ DataSeries >> atIndex: aNumber transform: aBlock [
160160
self at: key transform: aBlock
161161
]
162162

163+
{ #category : #information }
164+
DataSeries >> average [
165+
"We do not count the nils"
166+
167+
^ (self values reject: #isNil) average
168+
]
169+
163170
{ #category : #'data-types' }
164171
DataSeries >> calculateDataType [
165172

@@ -420,7 +427,8 @@ DataSeries >> isCategorical [
420427

421428
{ #category : #'categorical-numerical' }
422429
DataSeries >> isNumerical [
423-
^ forcedIsNumerical ifNil: [self uniqueValues allSatisfy: [:each|each isNumber]]
430+
431+
^ forcedIsNumerical ifNil: [ (self uniqueValues copyWithout: nil) allSatisfy: [ :each | each isNumber ] ]
424432
]
425433

426434
{ #category : #testing }
@@ -659,6 +667,16 @@ DataSeries >> sortedDescending [
659667
^ self sorted: [ :a :b | a > b ]
660668
]
661669

670+
{ #category : #transformation }
671+
DataSeries >> sum [
672+
"Return the sum of the values over the requested axis. Nil values are excluded."
673+
674+
| result |
675+
result := 0.
676+
self do: [ :each | each ifNotNil: [ result := result + each ] ].
677+
^ result
678+
]
679+
662680
{ #category : #statistics }
663681
DataSeries >> summary [
664682
| summary |

0 commit comments

Comments
 (0)