Skip to content

Commit 98a709e

Browse files
authored
Merge pull request #184 from jecisc/add-way-to-categorize-by-bins
Add a way to categorize a data series by bins
2 parents 4c0586f + 0076fa0 commit 98a709e

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

src/DataFrame-Tests/DataSeriesTest.class.st

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,37 @@ DataSeriesTest >> testGroupByAggregateUsingSizeMismatch [
12481248
raise: SizeMismatch.
12491249
]
12501250

1251+
{ #category : #grouping }
1252+
DataSeriesTest >> testGroupByBins [
1253+
1254+
| actual expected |
1255+
actual := series groupByBins: { 0 . 5 . 10 . 15 . Float infinity }.
1256+
expected := DataSeries withKeys: keyArray values: #( 1 2 2 4 2 2 2 2 3 3 4 ) name: 'ExampleSeries'.
1257+
self assert: actual equals: expected
1258+
]
1259+
1260+
{ #category : #grouping }
1261+
DataSeriesTest >> testGroupByBinsLabelled [
1262+
1263+
| actual expected |
1264+
actual := series
1265+
groupByBins: { 0 . 5 . 10 . 15 . Float infinity }
1266+
labelled: #( 0 5 10 15 ).
1267+
expected := DataSeries withKeys: keyArray values: #( 0 5 5 15 5 5 5 5 10 10 15 ) name: 'ExampleSeries'.
1268+
self assert: actual equals: expected
1269+
]
1270+
1271+
{ #category : #grouping }
1272+
DataSeriesTest >> testGroupByBinsLabelledWithSizeProblem [
1273+
1274+
self
1275+
should: [
1276+
series
1277+
groupByBins: { 0 . 5 . 10 . 15 . Float infinity }
1278+
labelled: #( 0 5 10 ) ]
1279+
raise: SizeMismatch
1280+
]
1281+
12511282
{ #category : #grouping }
12521283
DataSeriesTest >> testGroupByUniqueValuesAndAggregateUsing [
12531284

src/DataFrame/DataSeries.class.st

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,29 @@ DataSeries >> groupBy: otherSeries aggregateUsing: aBlock as: aNewName [
326326
^ self class withKeys: groupMap keys values: (groupMap values collect: aBlock) name: aNewName
327327
]
328328

329+
{ #category : #grouping }
330+
DataSeries >> groupByBins: bins [
331+
332+
^ self groupByBins: bins labelled: (1 to: bins size - 1)
333+
]
334+
335+
{ #category : #grouping }
336+
DataSeries >> groupByBins: bins labelled: aCollection [
337+
"I receive two parameters:
338+
- A collection of bins that will determine intervals to group the values
339+
- A collection of labels to apply for each intervals of the bins
340+
341+
I return a new DataSeries associating each key to a label corresponding to the bin they match."
342+
343+
| labelledIntervals |
344+
bins size = (aCollection size + 1) ifFalse: [ SizeMismatch signal: 'The labels should have one less elements than the bins.' ].
345+
346+
labelledIntervals := OrderedDictionary new.
347+
bins overlappingPairsWithIndexDo: [ :min :max :index | labelledIntervals at: (aCollection at: index) put: min -> max ].
348+
349+
^ self collect: [ :each | labelledIntervals keyAtValue: (labelledIntervals values detect: [ :asso | each between: asso key and: asso value ]) ]
350+
]
351+
329352
{ #category : #grouping }
330353
DataSeries >> groupByUniqueValuesAndAggregateUsing: aBlock [
331354
"Group my values by their unique values and aggregate them using aBlock. Use my name by default"

0 commit comments

Comments
 (0)