Skip to content

Commit 4bff346

Browse files
author
Sterling Paramore
committed
Merge pull request #8 from inside-track/Aggregate
Added support for generic aggregation in dataframes.
2 parents 7380900 + 2169b5b commit 4bff346

File tree

15 files changed

+152
-47
lines changed

15 files changed

+152
-47
lines changed

Gemfile.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PATH
22
remote: .
33
specs:
4-
remi (0.2.1)
4+
remi (0.2.3)
55
activesupport (~> 4.2)
66
bond (~> 0.5)
77
cucumber (~> 2.1)

lib/remi.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
require 'active_support/core_ext/object/try'
1919
require 'active_support/core_ext/object/inclusion'
2020
require 'active_support/core_ext/string/inflections'
21+
require 'active_support/core_ext/string/strip'
22+
require 'active_support/core_ext/string/filters'
2123
require 'active_support/core_ext/numeric/time'
2224
require 'active_support/core_ext/numeric/conversions'
2325
require 'active_support/core_ext/date/calculations'
@@ -30,7 +32,6 @@
3032

3133
# Remi
3234
require 'remi/version.rb'
33-
require 'remi/core/string.rb'
3435

3536
require 'remi/settings'
3637
require 'remi/job'
@@ -39,8 +40,8 @@
3940
require 'remi/data_subject'
4041
require 'remi/sf_bulk_helper' # separate into SF support package
4142

42-
require 'remi/core/daru'
43-
require 'remi/core/refinements'
43+
require 'remi/refinements/symbolizer'
44+
require 'remi/refinements/daru'
4445

4546
require 'remi/extractor/sftp_file'
4647

lib/remi/cli.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def parse(args = ARGV)
1515
options = {}
1616

1717
opt_parser = OptionParser.new do |opts|
18-
opts.banner = <<-EOT.unindent
18+
opts.banner = <<-EOT.strip_heredoc
1919
Usage: Command line helpers for Remi.
2020
EOT
2121

lib/remi/core/daru.rb

Lines changed: 0 additions & 28 deletions
This file was deleted.

lib/remi/core/string.rb

Lines changed: 0 additions & 8 deletions
This file was deleted.

lib/remi/cucumber/business_rules.rb

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module Remi::BusinessRules
2-
using Remi::Core::Refinements
2+
using Remi::Refinements::Symbolizer
33

44
def self.parse_full_field(full_field_name)
55
full_field_name.split(':').map(&:strip)
@@ -230,6 +230,15 @@ def size
230230
@data_obj.df.size
231231
end
232232

233+
# Public: Converts the data subject to a hash where the keys are the table
234+
# columns and the values are an array for the value of column for each row.
235+
def column_hash
236+
@data_obj.df.to_hash.reduce({}) do |h, (k,v)|
237+
h[k.symbolize] = v.to_a
238+
h
239+
end
240+
end
241+
233242
# For debugging only
234243
def _df
235244
@data_obj.df
@@ -429,6 +438,17 @@ def to_df(seed_hash, field_symbolizer:)
429438
end
430439
df
431440
end
441+
442+
# Public: Converts a Cucumber::Ast::Table to a hash where the keys are the table
443+
# columns and the values are an array for the value of column for each row.
444+
def column_hash
445+
@table.hashes.reduce({}) do |h, row|
446+
row.each do |k,v|
447+
(h[k.symbolize] ||= []) << v
448+
end
449+
h
450+
end
451+
end
432452
end
433453

434454

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
Feature: Tests the aggregate refinement to the Daru library
2+
3+
Background:
4+
Given the job is 'Aggregate'
5+
And the job source 'Source Data'
6+
And the job target 'Target Data'
7+
8+
And the source 'Source Data'
9+
And the target 'Target Data'
10+
11+
Scenario: The aggregator should find the minimum year for each 'Alpha'
12+
Given the following example record for 'Source Data':
13+
| Alpha | Year | something |
14+
| a | 2016 | 1 |
15+
| a | 2018 | 1 |
16+
| b | 2016 | 2 |
17+
| b | 2010 | 3 |
18+
| a | 2017 | 4 |
19+
And the following example record called 'expected result':
20+
| Alpha | Year |
21+
| a | 2016 |
22+
| b | 2010 |
23+
Then the target should match the example 'expected result'

lib/remi/project/features/step_definitions/remi_step.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,22 @@
201201
step "the target field '#{target_field}' is set to the value \"*#{date_reference}*\""
202202
end
203203

204+
Then /^the target '(.+)' should match the example '([[:alnum:]\s]+)'$/ do |target_name, example_name|
205+
@brt.run_transforms
206+
207+
target_hash = @brt.targets[target_name].column_hash
208+
example_hash = @brt.examples[example_name].column_hash
209+
common_keys = target_hash.keys & example_hash.keys
210+
211+
expect(target_hash.select { |k,v| common_keys.include? k })
212+
.to eq example_hash.select { |k,v| common_keys.include? k }
213+
end
214+
215+
Then /^the target should match the example '([[:alnum:]\s]+)'$/ do |example_name|
216+
target_name = @brt.targets.keys.first
217+
step "the target '#{target_name}' should match the example '#{example_name}'"
218+
end
219+
204220

205221
### Transforms
206222

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
require_relative '../../jobs/sample_job'
22
require_relative '../../jobs/copy_source_job'
33
require_relative '../../jobs/transforms/transform_jobs'
4+
require_relative '../../jobs/aggregate_job'
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
require_relative 'all_jobs_shared'
2+
3+
class AggregateJob
4+
include AllJobsShared
5+
using Remi::Refinements::Daru
6+
7+
define_source :source_data, Remi::DataSource::DataFrame
8+
define_target :target_data, Remi::DataTarget::DataFrame
9+
10+
define_transform :main, sources: :source_data, targets: :target_data do
11+
12+
mymin = lambda do |field, df, indicies|
13+
values = indicies.map { |idx| df.row[idx][field] }
14+
values.min
15+
end
16+
17+
target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
18+
target_data.df.vectors = Daru::Index.new([:alpha, :year])
19+
end
20+
end

0 commit comments

Comments
 (0)