From a79211414e540b103bebb3e8bb78e10ccbb8d6a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Sta=CC=88mpfli?= Date: Thu, 27 Sep 2012 15:57:53 +0200 Subject: [PATCH 1/6] refactoring: lookups are independent from definitions --- lib/data-import.rb | 1 + lib/data-import/definition.rb | 44 ++++++- .../definition/id_mapping_container.rb | 42 ++++++ lib/data-import/definition/lookup.rb | 91 ------------- lib/data-import/definition/mappings.rb | 2 +- lib/data-import/definition/script.rb | 2 - lib/data-import/definition/simple.rb | 4 +- lib/data-import/dependency_resolver.rb | 2 +- lib/data-import/dictionary.rb | 25 ++++ lib/data-import/dsl.rb | 8 +- lib/data-import/dsl/import.rb | 9 +- lib/data-import/dsl/lookup.rb | 16 +++ lib/data-import/dsl/script.rb | 5 +- lib/data-import/errors.rb | 12 ++ lib/data-import/execution_context.rb | 6 +- lib/data-import/execution_plan.rb | 5 +- .../acceptance/legacy_simple_mappings_spec.rb | 27 ++++ spec/acceptance/lookup_tables_spec.rb | 13 +- .../definition/id_mapping_container_spec.rb | 35 +++++ .../data-import/definition/lookup_spec.rb | 122 ------------------ .../data-import/definition/mappings_spec.rb | 12 +- .../data-import/definition/script_spec.rb | 2 +- .../definition/simple/importer_spec.rb | 8 +- .../data-import/definition/simple_spec.rb | 2 +- spec/unit/data-import/definition_spec.rb | 2 +- .../data-import/dependency_resolver_spec.rb | 5 +- spec/unit/data-import/dictionary_spec.rb | 22 ++++ spec/unit/data-import/dsl/import_spec.rb | 30 ++++- spec/unit/data-import/dsl/script_spec.rb | 29 ++++- spec/unit/data-import/dsl_spec.rb | 26 ++-- spec/unit/data-import/execution_plan_spec.rb | 6 + spec/unit/data-import/runner_spec.rb | 6 +- 32 files changed, 352 insertions(+), 269 deletions(-) create mode 100644 lib/data-import/definition/id_mapping_container.rb delete mode 100644 lib/data-import/definition/lookup.rb create mode 100644 lib/data-import/dictionary.rb create mode 100644 lib/data-import/dsl/lookup.rb create mode 100644 spec/unit/data-import/definition/id_mapping_container_spec.rb delete mode 100644 spec/unit/data-import/definition/lookup_spec.rb create mode 100644 spec/unit/data-import/dictionary_spec.rb diff --git a/lib/data-import.rb b/lib/data-import.rb index 77ba735..19d58e1 100644 --- a/lib/data-import.rb +++ b/lib/data-import.rb @@ -5,6 +5,7 @@ require "data-import/version" require "data-import/errors" +require 'data-import/dictionary' require 'data-import/dependency_resolver' require 'data-import/execution_context' require 'data-import/runner' diff --git a/lib/data-import/definition.rb b/lib/data-import/definition.rb index 7c85f56..e24bc35 100644 --- a/lib/data-import/definition.rb +++ b/lib/data-import/definition.rb @@ -1,4 +1,4 @@ -require 'data-import/definition/lookup' +require 'data-import/definition/id_mapping_container' require 'data-import/definition/mappings' require 'data-import/definition/simple' require 'data-import/definition/simple/importer' @@ -10,13 +10,53 @@ class Definition attr_reader :source_database, :target_database attr_reader :dependencies - def initialize(name, source_database, target_database) + def initialize(name, source_database, target_database, id_mapping_container) @name = name @source_database = source_database @target_database = target_database @dependencies = [] + @id_mapping_container = id_mapping_container end + def lookup_for(mapping_name, options = {}) + warn "[DEPRECATION] `definition(...).lookup_for(...)` is deprecated and will be removed in later versions! Use `lookup_for(...)` in import and script blocks instead instead.\n#{caller[0]}" + + attribute = options.fetch(:column) { mapping_name } + + if @id_mapping_container.has_dictionary_for?(name, mapping_name) + raise ArgumentError, "lookup-table for column '#{attribute}' was already defined" + else + d = if options.fetch(:ignore_case) { false } + CaseIgnoringDictionary.new + else + Dictionary.new + end + @id_mapping_container.add_dictionary(name, mapping_name, attribute, d) + end + end + + def row_imported(id, row) + warn "[DEPRECATION] `definition(...).row_imported(...)` is deprecated and will be removed in later versions! Use `id_mapping_for(...).add(...)` instead.\n#{caller[0]}" + + @id_mapping_container.update_dictionaries(name, id, row) + end + + def identify_by(mapping_name, value) + warn "[DEPRECATION] `definition(...).identify_by(...)` is deprecated and will be removed in later versions! Use `id_mapping_for(...).lookup(...)` instead.\n#{caller[0]}" + + return if value.blank? + if @id_mapping_container.has_dictionary_for?(name, mapping_name) + @id_mapping_container.fetch(name, mapping_name).lookup(value) + else + raise ArgumentError, "no lookup-table defined named '#{name}'" + end + end + + def has_lookup_table_on?(name) + @lookup_tables.has_key? name + end + private :has_lookup_table_on? + def add_dependency(dependency) @dependencies << dependency end diff --git a/lib/data-import/definition/id_mapping_container.rb b/lib/data-import/definition/id_mapping_container.rb new file mode 100644 index 0000000..7049181 --- /dev/null +++ b/lib/data-import/definition/id_mapping_container.rb @@ -0,0 +1,42 @@ +module DataImport + class Definition + class IdMappingContainer + def initialize + @mapping_configs = Hash.new {|hash, key| hash[key] = []} + end + + def add_dictionary(definition_name, mapping_name, attribute, dictionary) + @mapping_configs[definition_name] << IdMappingConfig.new(mapping_name, attribute, dictionary) + end + + def fetch(definition_name, mapping_name) + if has_dictionary_for?(definition_name, mapping_name) + @mapping_configs[definition_name].detect {|config| config.name == mapping_name}.dictionary + else + raise MissingIdMappingError.new(mapping_name) + end + end + + def has_dictionary_for?(definition_name, mapping_name) + @mapping_configs[definition_name].any? {|config| config.name == mapping_name} + end + + def update_dictionaries(definition_name, new_id, row) + @mapping_configs[definition_name].each do |config| + next if row[config.attribute].blank? + config.dictionary.add(row[config.attribute], new_id) + end + end + + class IdMappingConfig + attr_reader :name, :attribute, :dictionary + + def initialize(name, attribute, dictionary) + @name = name + @attribute = attribute.to_sym + @dictionary = dictionary + end + end + end + end +end diff --git a/lib/data-import/definition/lookup.rb b/lib/data-import/definition/lookup.rb deleted file mode 100644 index 161b71a..0000000 --- a/lib/data-import/definition/lookup.rb +++ /dev/null @@ -1,91 +0,0 @@ -module DataImport - class Definition - module Lookup - def initialize(*args) - @lookup_tables = {} - super - end - - def lookup_for(name, options = {}) - attribute = options.fetch(:column) { name } - - if has_lookup_table_on?(attribute) - raise ArgumentError, "lookup-table for column '#{attribute}' was already defined" - else - @lookup_tables[name] = if options.fetch(:ignore_case) { false } - CaseIgnoringTable.new(attribute) - else - Table.new(attribute) - end - end - end - - def row_imported(id, row) - row.each do |attribute, value| - next if value.blank? - add_lookup_value(attribute, value, id) - end - end - - def identify_by(name, value) - return if value.blank? - if has_lookup_table_named?(name) - @lookup_tables[name].lookup(value) - else - raise ArgumentError, "no lookup-table defined named '#{name}'" - end - end - - def has_lookup_table_on?(attribute) - @lookup_tables.values.any? { |t| t.for?(attribute) } - end - - def has_lookup_table_named?(name) - @lookup_tables.has_key?(name.to_sym) - end - - def add_lookup_value(attribute, value, id) - @lookup_tables.each do |_name, table| - table.process(attribute, value, id) - end - end - private :add_lookup_value - - class Table - def initialize(attribute) - @attribute = attribute.to_sym - @mappings = {} - end - - def for?(attribute) - @attribute == attribute.to_sym - end - - def process(attribute, key, id) - if for?(attribute) - add(key, id) - end - end - - def add(key, id) - @mappings[key] = id - end - - def lookup(key) - @mappings[key] - end - end - - class CaseIgnoringTable < Table - def add(key, id) - super(key.downcase, id) - end - - def lookup(key) - super(key.downcase) - end - end - - end - end -end diff --git a/lib/data-import/definition/mappings.rb b/lib/data-import/definition/mappings.rb index 2950280..ed2b23b 100644 --- a/lib/data-import/definition/mappings.rb +++ b/lib/data-import/definition/mappings.rb @@ -49,7 +49,7 @@ def initialize(referenced_definition, old_foreign_key, new_foreign_key, lookup_n end def apply!(definition, context, row, output_row) - output_row.merge!(@new_foreign_key => context.definition(@referenced_definition).identify_by(@lookup_name, row[@old_foreign_key])) + output_row.merge!(@new_foreign_key => context.id_mapping_for(@referenced_definition, @lookup_name).lookup(row[@old_foreign_key])) end end diff --git a/lib/data-import/definition/script.rb b/lib/data-import/definition/script.rb index 8c26bbe..2189b29 100644 --- a/lib/data-import/definition/script.rb +++ b/lib/data-import/definition/script.rb @@ -3,8 +3,6 @@ class Definition class Script < Definition attr_accessor :body - include Lookup - def run(context) target_database.transaction do context.instance_exec &body diff --git a/lib/data-import/definition/simple.rb b/lib/data-import/definition/simple.rb index 46cd1fa..aec12b5 100644 --- a/lib/data-import/definition/simple.rb +++ b/lib/data-import/definition/simple.rb @@ -2,14 +2,12 @@ module DataImport class Definition class Simple < Definition - include Lookup - attr_accessor :target_table_name attr_accessor :reader, :writer attr_accessor :after_blocks, :after_row_blocks attr_accessor :row_validation_blocks - def initialize(name, source_database, target_database) + def initialize(name, source_database, target_database, id_mapping_container) super @mappings = [] @mode = :insert diff --git a/lib/data-import/dependency_resolver.rb b/lib/data-import/dependency_resolver.rb index b68b2dc..c83008a 100644 --- a/lib/data-import/dependency_resolver.rb +++ b/lib/data-import/dependency_resolver.rb @@ -10,7 +10,7 @@ def resolve(options = {}) resolved_dependencies = @strategy.new(dependency_graph).call(options) resolved_dependencies = resolved_dependencies.map {|definition| @plan.definition(definition) } - ExecutionPlan.new(resolved_dependencies) + ExecutionPlan.new(resolved_dependencies, @plan.id_mapping_container) end def dependency_graph diff --git a/lib/data-import/dictionary.rb b/lib/data-import/dictionary.rb new file mode 100644 index 0000000..56ce9f1 --- /dev/null +++ b/lib/data-import/dictionary.rb @@ -0,0 +1,25 @@ +module DataImport + class Dictionary + def initialize + @mappings = {} + end + + def add(key, value) + @mappings[key] = value + end + + def lookup(key) + @mappings[key] + end + end + + class CaseIgnoringDictionary < Dictionary + def add(key, value) + super(key.nil? ? nil : key.downcase, value) + end + + def lookup(key) + super(key.nil? ? nil : key.downcase) + end + end +end diff --git a/lib/data-import/dsl.rb b/lib/data-import/dsl.rb index 4f82ab7..ea6e09e 100644 --- a/lib/data-import/dsl.rb +++ b/lib/data-import/dsl.rb @@ -49,17 +49,17 @@ def target(*args) end def import(name, &block) - definition = DataImport::Definition::Simple.new(name, source_database, target_database) + definition = DataImport::Definition::Simple.new(name, source_database, target_database, @plan.id_mapping_container) @plan.add_definition(definition) - Import.new(definition).instance_eval &block + Import.new(definition, @plan.id_mapping_container).instance_eval &block end def script(name, &block) - definition = DataImport::Definition::Script.new(name, source_database, target_database) + definition = DataImport::Definition::Script.new(name, source_database, target_database, @plan.id_mapping_container) @plan.add_definition(definition) - Script.new(definition).instance_eval &block + Script.new(definition, @plan.id_mapping_container).instance_eval &block end def before_filter(&block) diff --git a/lib/data-import/dsl/import.rb b/lib/data-import/dsl/import.rb index d0140e6..8e1ee3c 100644 --- a/lib/data-import/dsl/import.rb +++ b/lib/data-import/dsl/import.rb @@ -1,14 +1,17 @@ require 'data-import/dsl/dependencies' +require 'data-import/dsl/lookup' module DataImport class Dsl class Import include Dependencies + include Lookup attr_reader :definition - def initialize(definition) + def initialize(definition, id_mapping_container) @definition = definition + @id_mapping_container = id_mapping_container end def from(table_name = nil, options = {}, &block) @@ -82,10 +85,6 @@ def check_block_arity(block) end private :check_block_arity - def lookup_for(*attributes) - definition.lookup_for(*attributes) - end - end end end diff --git a/lib/data-import/dsl/lookup.rb b/lib/data-import/dsl/lookup.rb new file mode 100644 index 0000000..c74c50d --- /dev/null +++ b/lib/data-import/dsl/lookup.rb @@ -0,0 +1,16 @@ +module DataImport + class Dsl + module Lookup + def lookup_for(name, options = {}) + attribute = options.fetch(:column) { name } + dictionary = if options.fetch(:ignore_case) { false } + CaseIgnoringDictionary.new + else + Dictionary.new + end + + @id_mapping_container.add_dictionary(@definition.name, name, attribute, dictionary) + end + end + end +end diff --git a/lib/data-import/dsl/script.rb b/lib/data-import/dsl/script.rb index 36b3476..0aa4048 100644 --- a/lib/data-import/dsl/script.rb +++ b/lib/data-import/dsl/script.rb @@ -1,14 +1,17 @@ require 'data-import/dsl/dependencies' +require 'data-import/dsl/lookup' module DataImport class Dsl class Script include Dependencies + include Lookup attr_reader :definition - def initialize(definition) + def initialize(definition, id_mapping_container) @definition = definition + @id_mapping_container = id_mapping_container end def body(&block) diff --git a/lib/data-import/errors.rb b/lib/data-import/errors.rb index 587ece6..64c1fe9 100644 --- a/lib/data-import/errors.rb +++ b/lib/data-import/errors.rb @@ -11,6 +11,18 @@ def initialize(definition) end end + class MissingIdMappingError < RuntimeError + MESSAGE = <<-ERROR + no id mapping found for '%s'. possible reasons are: + - you did not define '%s' + - you executed a partial migration and forgot to add '%s' as a dependency + ERROR + + def initialize(name) + super(MESSAGE % [name, name, name]) + end + end + class CircularDependencyError < RuntimeError MESSAGE = <<-ERROR circular dependencies for: '%s' <-> '%s'. possible reasons are: diff --git a/lib/data-import/execution_context.rb b/lib/data-import/execution_context.rb index 4a4e315..47e95eb 100644 --- a/lib/data-import/execution_context.rb +++ b/lib/data-import/execution_context.rb @@ -28,12 +28,16 @@ def target_database @definition.target_database end + def id_mapping_for(definition_name, mapping_name) + @execution_plan.id_mapping_container.fetch(definition_name, mapping_name) + end + class Proxy def initialize(context) @context = context end - [:logger, :definition, :name, :source_database, :target_database].each do |method_symbol| + [:logger, :definition, :name, :source_database, :target_database, :id_mapping_for].each do |method_symbol| define_method method_symbol do |*args| @context.send(method_symbol, *args) end diff --git a/lib/data-import/execution_plan.rb b/lib/data-import/execution_plan.rb index 6cca2bb..5289e11 100644 --- a/lib/data-import/execution_plan.rb +++ b/lib/data-import/execution_plan.rb @@ -1,9 +1,12 @@ module DataImport class ExecutionPlan - def initialize(definitions = []) + attr_reader :id_mapping_container + + def initialize(definitions = [], id_mapping_container = nil) @definitions = Hash[definitions.map do |definition| [definition.name, definition] end] + @id_mapping_container = id_mapping_container || Definition::IdMappingContainer.new end def add_definition(definition) diff --git a/spec/acceptance/legacy_simple_mappings_spec.rb b/spec/acceptance/legacy_simple_mappings_spec.rb index d6838d4..060f9bb 100644 --- a/spec/acceptance/legacy_simple_mappings_spec.rb +++ b/spec/acceptance/legacy_simple_mappings_spec.rb @@ -7,6 +7,8 @@ from 'tblAnimal', :primary_key => 'sAnimalID' to 'animals' + lookup_for :sAnimalID + mapping 'sAnimalID' => :id mapping 'strAnimalTitleText' => :name mapping 'sAnimalAge' => 'age' @@ -52,6 +54,18 @@ target_database.db[:danger_ratings].insert(:id => 3, :description => 'high') end end + + import 'Food' do + dependencies 'Animals' + + from 'tblFood' + to 'food' + + mapping 'Name' => :name + mapping 'sAnimalID' do |context, sAnimalID| + {:animal_id => context.definition('Animals').identify_by(:sAnimalID, sAnimalID)} + end + end end database_setup do @@ -65,6 +79,11 @@ Date :dteDied end + source.create_table :tblFood do + String :Name + Integer :sAnimalID + end + target.create_table :animals do primary_key :id String :name @@ -75,6 +94,11 @@ String :danger_note end + target.create_table :food do + Integer :animal_id + String :name + end + target.create_table :animal_logs do primary_key :id Date :occured_at @@ -105,6 +129,9 @@ :sAnimalAge => 5, :strThreat => 'none', :dteBorn => Date.new(1998, 11, 9)) + + source[:tblFood].insert(:Name => 'Flesh', :sAnimalID => 1) + source[:tblFood].insert(:Name => 'Grass', :sAnimalID => 2) end it 'mapps columns to the new schema' do diff --git a/spec/acceptance/lookup_tables_spec.rb b/spec/acceptance/lookup_tables_spec.rb index 0c3077d..ab3dd8a 100644 --- a/spec/acceptance/lookup_tables_spec.rb +++ b/spec/acceptance/lookup_tables_spec.rb @@ -16,11 +16,12 @@ script 'Mark ruby article' do dependencies 'Articles' + lookup_for :sArticleId + body do - new_id = target_database.db[:articles].insert(:slug => definition('Articles').identify_by(:reference, 'ruby-is-awesome')) + new_id = target_database.db[:articles].insert(:slug => id_mapping_for('Articles', :reference).lookup('ruby-is-awesome')) - definition('Mark ruby article').lookup_for(:sArticleId) - definition('Mark ruby article').row_imported(new_id, {:sArticleId => 0}) + id_mapping_for('Mark ruby article', :sArticleId).add(0, new_id) end end @@ -28,7 +29,7 @@ dependencies 'Mark ruby article' body do - target_database.db[:posts].insert(:id => 11, :article_id => definition('Mark ruby article').identify_by(:sArticleId, 0)) + target_database.db[:posts].insert(:id => 11, :article_id => id_mapping_for('Mark ruby article', :sArticleId).lookup(0)) end end @@ -39,10 +40,10 @@ mapping 'sPostId' => :id mapping 'sArticleId' do - { :article_id => definition('Articles').identify_by(:sArticleId, row[:sArticleId]) } + { :article_id => id_mapping_for('Articles', :sArticleId).lookup(row[:sArticleId]) } end mapping 'strArticleRef' do - { :similar_article_id => definition('Articles').identify_by(:reference, row[:strArticleRef]) } + { :similar_article_id => id_mapping_for('Articles', :reference).lookup(row[:strArticleRef]) } end end diff --git a/spec/unit/data-import/definition/id_mapping_container_spec.rb b/spec/unit/data-import/definition/id_mapping_container_spec.rb new file mode 100644 index 0000000..c07b6cd --- /dev/null +++ b/spec/unit/data-import/definition/id_mapping_container_spec.rb @@ -0,0 +1,35 @@ +require 'unit/spec_helper' + +describe DataImport::Definition::IdMappingContainer do + it 'raises an error if a dictionary cannot be found' do + lambda do + subject.fetch('Animals', 'gender') + end.should raise_error(DataImport::MissingIdMappingError) + end + + context 'with dictionaries' do + let(:name_dictionary) { stub } + let(:tag_dictionary) { stub } + + before do + subject.add_dictionary('Animals', 'name', :name, name_dictionary) + subject.add_dictionary('Animals', 'tagged animal', :tag, tag_dictionary) + end + + it 'fetches an id dictionary' do + subject.fetch('Animals', 'name').should == name_dictionary + end + + it 'determines if a dictionary exists' do + subject.has_dictionary_for?('Animals', 'name').should == true + subject.has_dictionary_for?('People', 'name').should == false + end + + it 'updates the id dictionaries when a row has benn imported' do + name_dictionary.should_receive(:add).with('Lion', 5) + tag_dictionary.should_not_receive(:add) + + subject.update_dictionaries('Animals', 5, {:name => 'Lion', :continent => 'Africa'}) + end + end +end diff --git a/spec/unit/data-import/definition/lookup_spec.rb b/spec/unit/data-import/definition/lookup_spec.rb deleted file mode 100644 index 115e565..0000000 --- a/spec/unit/data-import/definition/lookup_spec.rb +++ /dev/null @@ -1,122 +0,0 @@ -require 'unit/spec_helper' - -describe DataImport::Definition::Lookup do - - let(:example_class) do - Class.new do - include DataImport::Definition::Lookup - end - end - - subject { example_class.new } - - describe "lookup-table definition" do - it 'knows what attributes have a lookup-table' do - subject.lookup_for :code - - subject.has_lookup_table_on?(:code).should be_true - end - - it 'knows what attributes do not have a lookup-table' do - subject.lookup_for :code - - subject.has_lookup_table_on?(:oldID).should be_false - subject.has_lookup_table_on?(:strRef).should be_false - subject.has_lookup_table_on?(:abcd).should be_false - end - - it 'allows to define lookup-tables with multiple calls' do - subject.lookup_for :code - subject.lookup_for :strRef - - subject.has_lookup_table_on?(:code).should be_true - subject.has_lookup_table_on?(:strRef).should be_true - end - - it 'works with strings' do - subject.lookup_for 'a_string' - - subject.has_lookup_table_on?(:a_string).should be_true - end - - it 'works with symbols' do - subject.lookup_for :a_symbol - - subject.has_lookup_table_on?('a_symbol').should be_true - end - - it 'should not allow to define two lookup-tables with the same name' do - subject.lookup_for :code - lambda do - subject.lookup_for :code - end.should raise_error(ArgumentError, "lookup-table for column 'code' was already defined") - end - - it 'should not allow to define two lookup-tables for the same column' do - subject.lookup_for :code - lambda do - subject.lookup_for :same_code, :column => :code - end.should raise_error(ArgumentError, "lookup-table for column 'code' was already defined") - end - end - - describe 'mappings and lookups' do - before { subject.lookup_for :code } - - it 'does not add any mappings when no lookup-attributes are given' do - lambda do - subject.row_imported(66, :undefined_attribute => 'value-to-lookup') - end.should_not raise_error - end - - it 'stores added mappings' do - id = 17 - lookup_value = 'value-to-lookup' - - subject.row_imported(id, :code => lookup_value) - - subject.identify_by(:code, lookup_value).should == id - end - - it 'allows to specify a column name different form the lookup name' do - id = 9 - ref = 'i-am-a-reference' - subject.lookup_for :reference, :column => 'strRef' - - subject.row_imported(id, :strRef => ref) - - subject.identify_by(:reference, ref).should == id - end - - it 'raises an exception when trying accessing an undefined lookup-table' do - lambda do - subject.identify_by(:undefined_lookup_table, 'this-wont-work') - end.should raise_error(ArgumentError, "no lookup-table defined named 'undefined_lookup_table'") - end - - it 'do not add nil value mappings' do - do_not_map_this_id = 6 - subject.row_imported(do_not_map_this_id, :code => nil) - - subject.identify_by(:code, nil).should == nil - end - end - - context 'Case Ignoring LookupTable' do - before do - subject.lookup_for :reference, :ignore_case => true - end - - it 'ignores case if specified' do - id = 9 - subject.row_imported(id, :reference => 'i-AM-a-REF') - - subject.identify_by(:reference, 'i-am-a-reF').should == id - end - - it 'works with nil values' do - id = 9 - subject.identify_by(:reference, nil).should == nil - end - end -end diff --git a/spec/unit/data-import/definition/mappings_spec.rb b/spec/unit/data-import/definition/mappings_spec.rb index 2cab4d8..3cb060a 100644 --- a/spec/unit/data-import/definition/mappings_spec.rb +++ b/spec/unit/data-import/definition/mappings_spec.rb @@ -108,9 +108,9 @@ it 'sets the foreign key to the newly generated primary key' do row = {:sLegacyAddressId => 28} - address_definition = stub - context.should_receive(:definition).with('OldAddress').and_return(address_definition) - address_definition.should_receive(:identify_by).with(:reference, 28).and_return(4) + id_mapping = stub + context.should_receive(:id_mapping_for).with('OldAddress', :reference).and_return(id_mapping) + id_mapping.should_receive(:lookup).with(28).and_return(4) subject.apply!(definition, context, row, output_row) output_row.should == {:address_id => 4} @@ -123,9 +123,9 @@ it 'uses :id to look up the newly generated id' do row = {:sLegacyAddressId => 28} - address_definition = stub - context.should_receive(:definition).with('OldAddress').and_return(address_definition) - address_definition.should_receive(:identify_by).with(:id, 28) + id_mapping = stub + context.should_receive(:id_mapping_for).with('OldAddress', :id).and_return(id_mapping) + id_mapping.should_receive(:lookup).with(28) subject.apply!(definition, context, row, output_row) end diff --git a/spec/unit/data-import/definition/script_spec.rb b/spec/unit/data-import/definition/script_spec.rb index bc36a68..46b5f37 100644 --- a/spec/unit/data-import/definition/script_spec.rb +++ b/spec/unit/data-import/definition/script_spec.rb @@ -3,7 +3,7 @@ describe DataImport::Definition::Script do let(:source) { stub } let(:target) { mock } - subject { described_class.new('a', source, target) } + subject { described_class.new('a', source, target, nil) } describe '#run' do let(:context) { stub(:name => 'ABC') } diff --git a/spec/unit/data-import/definition/simple/importer_spec.rb b/spec/unit/data-import/definition/simple/importer_spec.rb index 66092c4..d4d4663 100644 --- a/spec/unit/data-import/definition/simple/importer_spec.rb +++ b/spec/unit/data-import/definition/simple/importer_spec.rb @@ -4,8 +4,9 @@ let(:source) { stub } let(:target) { stub } - let(:other_definition) { DataImport::Definition::Simple.new 'C', source, target } - let(:definition) { DataImport::Definition::Simple.new 'A', source, target } + let(:other_definition) { DataImport::Definition::Simple.new 'C', source, target, nil } + let(:dictionary) { stub } + let(:definition) { DataImport::Definition::Simple.new 'A', source, target, dictionary } let(:progress_reporter) { mock('ProgressReporter') } let(:context) { mock('Context', :name => 'A', :progress_reporter => progress_reporter) } before { context.stub(:definition).with('C').and_return(other_definition) } @@ -80,6 +81,7 @@ subject.should_receive(:map_row).with(instance_of(DataImport::Definition::Simple::Context), {:id => 1}).and_return({:new_id => 1}) writer.should_receive(:write_row).any_number_of_times + dictionary.should_receive(:update_dictionaries) subject.import_row(:id => 1) validated_mapped_rows.should == [{:new_id => 1}] @@ -110,6 +112,8 @@ subject.should_receive(:map_row).with(instance_of(DataImport::Definition::Simple::Context), {:id => 1}).and_return({:new_id => 1}) subject.should_receive(:map_row).with(instance_of(DataImport::Definition::Simple::Context), {:id => 2}).and_return({:new_id => 2}) writer.should_receive(:write_row).any_number_of_times + dictionary.should_receive(:update_dictionaries).twice + subject.import_row(:id => 1) subject.import_row(:id => 2) diff --git a/spec/unit/data-import/definition/simple_spec.rb b/spec/unit/data-import/definition/simple_spec.rb index 3b0456a..ad31774 100644 --- a/spec/unit/data-import/definition/simple_spec.rb +++ b/spec/unit/data-import/definition/simple_spec.rb @@ -4,7 +4,7 @@ let(:source) { stub } let(:target) { stub } - subject { DataImport::Definition::Simple.new('a', source, target) } + subject { DataImport::Definition::Simple.new('a', source, target, nil) } describe "#mappings" do it "returns an empty hash by default" do diff --git a/spec/unit/data-import/definition_spec.rb b/spec/unit/data-import/definition_spec.rb index 23181f0..75cfa5a 100644 --- a/spec/unit/data-import/definition_spec.rb +++ b/spec/unit/data-import/definition_spec.rb @@ -2,7 +2,7 @@ describe DataImport::Definition do - subject { DataImport::Definition.new('a', :source, :target) } + subject { DataImport::Definition.new('a', :source, :target, nil) } it 'executes in 1 step' do subject.total_steps_required.should == 1 diff --git a/spec/unit/data-import/dependency_resolver_spec.rb b/spec/unit/data-import/dependency_resolver_spec.rb index 4b25a0e..9bafda7 100644 --- a/spec/unit/data-import/dependency_resolver_spec.rb +++ b/spec/unit/data-import/dependency_resolver_spec.rb @@ -7,7 +7,8 @@ let(:a) { stub('A', :name => 'A', :dependencies => []) } let(:b) { stub('B', :name => 'B', :dependencies => ['A']) } let(:c) { stub('C', :name => 'C', :dependencies => ['A', 'B']) } - let(:plan) { DataImport::ExecutionPlan.new([a, b, c]) } + let(:container) { stub } + let(:plan) { DataImport::ExecutionPlan.new([a, b, c], container) } subject { DataImport::DependencyResolver.new(plan, strategy) } let(:graph) { @@ -28,7 +29,7 @@ plan resolved_plan = mock - DataImport::ExecutionPlan.should_receive(:new).with([a, b]).and_return(resolved_plan) + DataImport::ExecutionPlan.should_receive(:new).with([a, b], container).and_return(resolved_plan) subject.resolve.should == resolved_plan end diff --git a/spec/unit/data-import/dictionary_spec.rb b/spec/unit/data-import/dictionary_spec.rb new file mode 100644 index 0000000..2b2882f --- /dev/null +++ b/spec/unit/data-import/dictionary_spec.rb @@ -0,0 +1,22 @@ +require 'unit/spec_helper' + +describe DataImport::Dictionary do + subject { described_class.new } + it 'fetches an id by its key' do + subject.add('leo', 'lion') + subject.lookup('leo').should == 'lion' + end +end + +describe DataImport::CaseIgnoringDictionary do + subject { described_class.new } + it 'fetches an id by its key ignoring case' do + subject.add('leo', 'lion') + subject.lookup('Leo').should == 'lion' + end + + it 'handles nil values' do + subject.add(nil, 0) + subject.lookup(nil).should == 0 + end +end diff --git a/spec/unit/data-import/dsl/import_spec.rb b/spec/unit/data-import/dsl/import_spec.rb index 963fb07..3c52b79 100644 --- a/spec/unit/data-import/dsl/import_spec.rb +++ b/spec/unit/data-import/dsl/import_spec.rb @@ -5,8 +5,9 @@ let(:source) { stub(:adapter_scheme => 'sqlite') } let(:target) { stub } - let(:definition) { DataImport::Definition::Simple.new('d', source, target) } - subject { DataImport::Dsl::Import.new(definition) } + let(:container) { stub } + let(:definition) { DataImport::Definition::Simple.new('d', source, target, container) } + subject { DataImport::Dsl::Import.new(definition, container) } describe "#from" do context 'when a table-name is passed' do @@ -165,4 +166,29 @@ definition.row_validation_blocks.should == [validation_block] end + describe 'lookups' do + let(:dictionary) { stub } + + it 'defines an id mapping' do + DataImport::Dictionary.should_receive(:new).and_return(dictionary) + container.should_receive(:add_dictionary).with('d', :name, :strName, dictionary) + + subject.lookup_for :name, :column => :strName + end + + it 'defines an id mapping without explicitly setting the column' do + DataImport::Dictionary.should_receive(:new).and_return(dictionary) + container.should_receive(:add_dictionary).with('d', :name, :name, dictionary) + + subject.lookup_for :name + end + + it 'defines an case insensitive id mapping' do + DataImport::CaseIgnoringDictionary.should_receive(:new).and_return(dictionary) + container.should_receive(:add_dictionary).with('d', :name, :name, dictionary) + + subject.lookup_for :name, :ignore_case => true + end + end + end diff --git a/spec/unit/data-import/dsl/script_spec.rb b/spec/unit/data-import/dsl/script_spec.rb index 7263bb6..3906a6c 100644 --- a/spec/unit/data-import/dsl/script_spec.rb +++ b/spec/unit/data-import/dsl/script_spec.rb @@ -4,8 +4,9 @@ let(:source) { stub } let(:target) { stub } - let(:definition) { DataImport::Definition::Script.new('s', source, target) } - subject { described_class.new(definition) } + let(:container) { stub } + let(:definition) { DataImport::Definition::Script.new('s', source, target, container) } + subject { described_class.new(definition, container) } describe "#dependencies" do it "sets the list of definitions it depends on" do @@ -30,4 +31,28 @@ end end + describe 'lookups' do + let(:dictionary) { stub } + + it 'defines an id mapping' do + DataImport::Dictionary.should_receive(:new).and_return(dictionary) + container.should_receive(:add_dictionary).with('s', :name, :strName, dictionary) + + subject.lookup_for :name, :column => :strName + end + + it 'defines an id mapping without explicitly setting the column' do + DataImport::Dictionary.should_receive(:new).and_return(dictionary) + container.should_receive(:add_dictionary).with('s', :name, :name, dictionary) + + subject.lookup_for :name + end + + it 'defines an case insensitive id mapping' do + DataImport::CaseIgnoringDictionary.should_receive(:new).and_return(dictionary) + container.should_receive(:add_dictionary).with('s', :name, :name, dictionary) + + subject.lookup_for :name, :ignore_case => true + end + end end diff --git a/spec/unit/data-import/dsl_spec.rb b/spec/unit/data-import/dsl_spec.rb index 06a4d0a..c1c3f51 100644 --- a/spec/unit/data-import/dsl_spec.rb +++ b/spec/unit/data-import/dsl_spec.rb @@ -64,13 +64,17 @@ end describe "#import" do + let(:definition) { stub } + let(:container) { stub } + it "adds a new import config to the import" do subject.stub(:source_database).and_return { nil } subject.stub(:target_database).and_return { nil } - definition = stub - DataImport::Definition::Simple.should_receive(:new).with('Import 5', nil, nil).and_return(definition) + DataImport::Definition::Simple.should_receive(:new).with('Import 5', nil, nil, container).and_return(definition) plan.should_receive(:add_definition).with(definition) + plan.should_receive(:id_mapping_container).any_number_of_times.and_return(container) + subject.import('Import 5') {} end @@ -78,9 +82,9 @@ subject.stub(:source_database).and_return { :source } subject.stub(:target_database).and_return { :target } - definition = stub - DataImport::Definition::Simple.should_receive(:new).with('a', :source, :target).and_return(definition) + DataImport::Definition::Simple.should_receive(:new).with('a', :source, :target, container).and_return(definition) plan.should_receive(:add_definition).with(definition) + plan.should_receive(:id_mapping_container).any_number_of_times.and_return(container) subject.import('a') {} end @@ -91,10 +95,10 @@ my_block = lambda {} import_dsl = stub - definition = stub DataImport::Definition::Simple.should_receive(:new).with(any_args).and_return(definition) plan.should_receive(:add_definition).with(definition) - DataImport::Dsl::Import.should_receive(:new).with(definition).and_return(import_dsl) + plan.should_receive(:id_mapping_container).any_number_of_times.and_return(container) + DataImport::Dsl::Import.should_receive(:new).with(definition, container).and_return(import_dsl) import_dsl.should_receive(:instance_eval).with(&my_block) subject.import 'name', &my_block @@ -103,13 +107,15 @@ describe "#script" do let(:definition) { stub } + let(:container) { stub } it "adds a new script config to the import" do subject.stub(:source_database).and_return { nil } subject.stub(:target_database).and_return { nil } - DataImport::Definition::Script.should_receive(:new).with('Script', nil, nil).and_return(definition) + DataImport::Definition::Script.should_receive(:new).with('Script', nil, nil, container).and_return(definition) plan.should_receive(:add_definition).with(definition) + plan.should_receive(:id_mapping_container).any_number_of_times.and_return(container) subject.script('Script') {} end @@ -117,8 +123,9 @@ subject.stub(:source_database).and_return { :source } subject.stub(:target_database).and_return { :target } - DataImport::Definition::Script.should_receive(:new).with('a', :source, :target).and_return(definition) + DataImport::Definition::Script.should_receive(:new).with('a', :source, :target, container).and_return(definition) plan.should_receive(:add_definition).with(definition) + plan.should_receive(:id_mapping_container).any_number_of_times.and_return(container) subject.script('a') {} end @@ -131,7 +138,8 @@ script_dsl = stub DataImport::Definition::Script.should_receive(:new).with(any_args).and_return(definition) plan.should_receive(:add_definition).with(definition) - DataImport::Dsl::Script.should_receive(:new).with(definition).and_return(script_dsl) + plan.should_receive(:id_mapping_container).any_number_of_times.and_return(container) + DataImport::Dsl::Script.should_receive(:new).with(definition, container).and_return(script_dsl) script_dsl.should_receive(:instance_eval).with(&my_block) subject.script 'name', &my_block diff --git a/spec/unit/data-import/execution_plan_spec.rb b/spec/unit/data-import/execution_plan_spec.rb index 8dffe39..2458400 100644 --- a/spec/unit/data-import/execution_plan_spec.rb +++ b/spec/unit/data-import/execution_plan_spec.rb @@ -11,6 +11,12 @@ plan.definitions.should == definitions end + it 'can be created with an existing id mapping container' do + container = stub + plan = DataImport::ExecutionPlan.new([], container) + plan.id_mapping_container.should == container + end + it 'raises an error when a non-existing definition is fetched' do lambda do subject.definition('I-do-not-exist') diff --git a/spec/unit/data-import/runner_spec.rb b/spec/unit/data-import/runner_spec.rb index 9dacc73..f2a909e 100644 --- a/spec/unit/data-import/runner_spec.rb +++ b/spec/unit/data-import/runner_spec.rb @@ -11,9 +11,9 @@ def finish; end end context 'with simple definitions' do - let(:people) { DataImport::Definition.new('People', 'tblPerson', 'people') } - let(:animals) { DataImport::Definition.new('Animals', 'tblAnimal', 'animals') } - let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles') } + let(:people) { DataImport::Definition.new('People', 'tblPerson', 'people', nil) } + let(:animals) { DataImport::Definition.new('Animals', 'tblAnimal', 'animals', nil) } + let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles', nil) } let(:plan) { DataImport::ExecutionPlan.new } before do plan.add_definition(articles) From e96396972a853ead318156c2c7ad106585610f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Sta=CC=88mpfli?= Date: Mon, 15 Oct 2012 09:05:06 +0200 Subject: [PATCH 2/6] refactoring: deprecated method should not be used anymore --- lib/data-import/definition/simple/importer.rb | 2 +- lib/data-import/execution_context.rb | 4 ++++ .../definition/simple/importer_spec.rb | 17 +++++++++-------- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/data-import/definition/simple/importer.rb b/lib/data-import/definition/simple/importer.rb index 9285481..3ece8c3 100644 --- a/lib/data-import/definition/simple/importer.rb +++ b/lib/data-import/definition/simple/importer.rb @@ -36,7 +36,7 @@ def import_row(row) if row_valid?(row_context) new_id = @definition.writer.write_row(mapped_row) - @definition.row_imported(new_id, row) + @context.id_mapping_container.update_dictionaries(@definition.name, new_id, row) @definition.after_row_blocks.each do |block| row_context.instance_exec(row_context, row, mapped_row, &block) diff --git a/lib/data-import/execution_context.rb b/lib/data-import/execution_context.rb index 47e95eb..5faed48 100644 --- a/lib/data-import/execution_context.rb +++ b/lib/data-import/execution_context.rb @@ -32,6 +32,10 @@ def id_mapping_for(definition_name, mapping_name) @execution_plan.id_mapping_container.fetch(definition_name, mapping_name) end + def id_mapping_container + @execution_plan.id_mapping_container + end + class Proxy def initialize(context) @context = context diff --git a/spec/unit/data-import/definition/simple/importer_spec.rb b/spec/unit/data-import/definition/simple/importer_spec.rb index d4d4663..9170150 100644 --- a/spec/unit/data-import/definition/simple/importer_spec.rb +++ b/spec/unit/data-import/definition/simple/importer_spec.rb @@ -5,10 +5,10 @@ let(:source) { stub } let(:target) { stub } let(:other_definition) { DataImport::Definition::Simple.new 'C', source, target, nil } - let(:dictionary) { stub } - let(:definition) { DataImport::Definition::Simple.new 'A', source, target, dictionary } + let(:id_mapping_container) { mock('Id mapping container') } + let(:definition) { DataImport::Definition::Simple.new 'A', source, target, id_mapping_container } let(:progress_reporter) { mock('ProgressReporter') } - let(:context) { mock('Context', :name => 'A', :progress_reporter => progress_reporter) } + let(:context) { mock('Context', :name => 'A', :progress_reporter => progress_reporter, :id_mapping_container => id_mapping_container) } before { context.stub(:definition).with('C').and_return(other_definition) } subject { described_class.new(context, definition) } @@ -81,7 +81,7 @@ subject.should_receive(:map_row).with(instance_of(DataImport::Definition::Simple::Context), {:id => 1}).and_return({:new_id => 1}) writer.should_receive(:write_row).any_number_of_times - dictionary.should_receive(:update_dictionaries) + id_mapping_container.should_receive(:update_dictionaries) subject.import_row(:id => 1) validated_mapped_rows.should == [{:new_id => 1}] @@ -112,7 +112,7 @@ subject.should_receive(:map_row).with(instance_of(DataImport::Definition::Simple::Context), {:id => 1}).and_return({:new_id => 1}) subject.should_receive(:map_row).with(instance_of(DataImport::Definition::Simple::Context), {:id => 2}).and_return({:new_id => 2}) writer.should_receive(:write_row).any_number_of_times - dictionary.should_receive(:update_dictionaries).twice + id_mapping_container.should_receive(:update_dictionaries).twice subject.import_row(:id => 1) subject.import_row(:id => 2) @@ -126,7 +126,8 @@ let(:id_mapping) { mock } let(:name_mapping) { mock } let(:mappings) { [id_mapping, name_mapping] } - let(:definition) { stub(:mappings => mappings, + let(:definition) { stub(:name => 'A', + :mappings => mappings, :writer => writer, :after_row_blocks => [], :row_validation_blocks => []) } @@ -150,13 +151,13 @@ it "executes the insertion" do writer.should_receive(:write_row).with({:id => 1}) - definition.stub(:row_imported) + id_mapping_container.should_receive(:update_dictionaries) subject.import_row(row) end it "adds the generated id to the id mapping of the definition" do definition.writer.stub(:write_row).and_return(15) - definition.should_receive(:row_imported).with(15, {:id => 1}) + id_mapping_container.should_receive(:update_dictionaries).with('A', 15, {:id => 1}) subject.import_row(:id => 1) end end From 8e5e43356c113225c09ae6added4ba201892fdc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Sta=CC=88mpfli?= Date: Wed, 17 Oct 2012 08:49:18 +0200 Subject: [PATCH 3/6] load and export mapping data in a native format --- .../definition/id_mapping_container.rb | 34 ++++++++++++++++++- lib/data-import/dictionary.rb | 4 +++ .../definition/id_mapping_container_spec.rb | 30 ++++++++++++++++ spec/unit/data-import/dictionary_spec.rb | 5 +++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/lib/data-import/definition/id_mapping_container.rb b/lib/data-import/definition/id_mapping_container.rb index 7049181..a7c8598 100644 --- a/lib/data-import/definition/id_mapping_container.rb +++ b/lib/data-import/definition/id_mapping_container.rb @@ -11,12 +11,17 @@ def add_dictionary(definition_name, mapping_name, attribute, dictionary) def fetch(definition_name, mapping_name) if has_dictionary_for?(definition_name, mapping_name) - @mapping_configs[definition_name].detect {|config| config.name == mapping_name}.dictionary + fetch_config(definition_name, mapping_name).dictionary else raise MissingIdMappingError.new(mapping_name) end end + def fetch_config(definition_name, mapping_name) + @mapping_configs[definition_name].detect {|config| config.name == mapping_name} + end + private :fetch_config + def has_dictionary_for?(definition_name, mapping_name) @mapping_configs[definition_name].any? {|config| config.name == mapping_name} end @@ -28,6 +33,23 @@ def update_dictionaries(definition_name, new_id, row) end end + def to_hash + @mapping_configs.each_with_object({}) do |(definition_name, configs), result| + result[definition_name] = configs.map(&:to_hash) + end + end + + def load(mapping_data) + mapping_data.each do |definition_name, configs| + configs.each do |config_data| + config = fetch_config(definition_name, config_data[:name]) + if config.present? && config.attribute == config_data[:attribute] + config.load(config_data[:mappings]) + end + end + end + end + class IdMappingConfig attr_reader :name, :attribute, :dictionary @@ -36,6 +58,16 @@ def initialize(name, attribute, dictionary) @attribute = attribute.to_sym @dictionary = dictionary end + + def to_hash + {:name => name, :attribute => attribute, :mappings => dictionary.to_hash} + end + + def load(mappings) + mappings.each do |key, value| + dictionary.add(key, value) + end + end end end end diff --git a/lib/data-import/dictionary.rb b/lib/data-import/dictionary.rb index 56ce9f1..229e506 100644 --- a/lib/data-import/dictionary.rb +++ b/lib/data-import/dictionary.rb @@ -11,6 +11,10 @@ def add(key, value) def lookup(key) @mappings[key] end + + def to_hash + @mappings + end end class CaseIgnoringDictionary < Dictionary diff --git a/spec/unit/data-import/definition/id_mapping_container_spec.rb b/spec/unit/data-import/definition/id_mapping_container_spec.rb index c07b6cd..bd03830 100644 --- a/spec/unit/data-import/definition/id_mapping_container_spec.rb +++ b/spec/unit/data-import/definition/id_mapping_container_spec.rb @@ -31,5 +31,35 @@ subject.update_dictionaries('Animals', 5, {:name => 'Lion', :continent => 'Africa'}) end + + it 'puts the dictionaries into a class independent format' do + name_dictionary.should_receive(:to_hash).and_return({1 => 7}) + tag_dictionary.should_receive(:to_hash).and_return({6 => 2}) + + subject.to_hash.should == { + 'Animals' => [ + {:name => 'name', + :attribute => :name, + :mappings => {1 => 7}}, + {:name => 'tagged animal', + :attribute => :tag, + :mappings => {6 => 2}} + ]} + end + + it 'loads the data in the exported format again' do + name_dictionary.should_receive(:add).with(7, 8) + tag_dictionary.should_not_receive(:add) + + subject.load({ + 'Animals' => [ + {:name => 'name', + :attribute => :name, + :mappings => {7 => 8}}, + {:name => 'tagged animal', + :attribute => :chip_id, + :mappings => {6 => 3}} + ]}) + end end end diff --git a/spec/unit/data-import/dictionary_spec.rb b/spec/unit/data-import/dictionary_spec.rb index 2b2882f..aefc393 100644 --- a/spec/unit/data-import/dictionary_spec.rb +++ b/spec/unit/data-import/dictionary_spec.rb @@ -6,6 +6,11 @@ subject.add('leo', 'lion') subject.lookup('leo').should == 'lion' end + + it 'has a hash representation of the data' do + subject.add('chuck norris', 'strong') + subject.to_hash.should == {'chuck norris' => 'strong'} + end end describe DataImport::CaseIgnoringDictionary do From 316706022a35608bc2f012e7ba606e3b8e1b5ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Sta=CC=88mpfli?= Date: Mon, 29 Oct 2012 08:44:43 +0100 Subject: [PATCH 4/6] ability to clear dictionaries --- lib/data-import/definition/id_mapping_container.rb | 14 ++++++++++++++ lib/data-import/dictionary.rb | 8 ++++++++ .../definition/id_mapping_container_spec.rb | 13 +++++++++++-- spec/unit/data-import/dictionary_spec.rb | 12 ++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/lib/data-import/definition/id_mapping_container.rb b/lib/data-import/definition/id_mapping_container.rb index a7c8598..cec0fe4 100644 --- a/lib/data-import/definition/id_mapping_container.rb +++ b/lib/data-import/definition/id_mapping_container.rb @@ -33,6 +33,14 @@ def update_dictionaries(definition_name, new_id, row) end end + def clear + @mapping_configs.each do |_name, configs| + configs.each do |config| + config.clear + end + end + end + def to_hash @mapping_configs.each_with_object({}) do |(definition_name, configs), result| result[definition_name] = configs.map(&:to_hash) @@ -40,6 +48,8 @@ def to_hash end def load(mapping_data) + clear + mapping_data.each do |definition_name, configs| configs.each do |config_data| config = fetch_config(definition_name, config_data[:name]) @@ -59,6 +69,10 @@ def initialize(name, attribute, dictionary) @dictionary = dictionary end + def clear + dictionary.clear + end + def to_hash {:name => name, :attribute => attribute, :mappings => dictionary.to_hash} end diff --git a/lib/data-import/dictionary.rb b/lib/data-import/dictionary.rb index 229e506..5bf3cc6 100644 --- a/lib/data-import/dictionary.rb +++ b/lib/data-import/dictionary.rb @@ -15,6 +15,14 @@ def lookup(key) def to_hash @mappings end + + def empty? + @mappings.empty? + end + + def clear + @mappings.clear + end end class CaseIgnoringDictionary < Dictionary diff --git a/spec/unit/data-import/definition/id_mapping_container_spec.rb b/spec/unit/data-import/definition/id_mapping_container_spec.rb index bd03830..41e2042 100644 --- a/spec/unit/data-import/definition/id_mapping_container_spec.rb +++ b/spec/unit/data-import/definition/id_mapping_container_spec.rb @@ -32,6 +32,13 @@ subject.update_dictionaries('Animals', 5, {:name => 'Lion', :continent => 'Africa'}) end + it 'clear all mapping data' do + name_dictionary.should_receive(:clear) + tag_dictionary.should_receive(:clear) + + subject.clear + end + it 'puts the dictionaries into a class independent format' do name_dictionary.should_receive(:to_hash).and_return({1 => 7}) tag_dictionary.should_receive(:to_hash).and_return({6 => 2}) @@ -47,8 +54,10 @@ ]} end - it 'loads the data in the exported format again' do - name_dictionary.should_receive(:add).with(7, 8) + it 'purges and loads the dictionaries from the exported format' do + name_dictionary.should_receive(:clear).ordered + tag_dictionary.should_receive(:clear).ordered + name_dictionary.should_receive(:add).with(7, 8).ordered tag_dictionary.should_not_receive(:add) subject.load({ diff --git a/spec/unit/data-import/dictionary_spec.rb b/spec/unit/data-import/dictionary_spec.rb index aefc393..816d816 100644 --- a/spec/unit/data-import/dictionary_spec.rb +++ b/spec/unit/data-import/dictionary_spec.rb @@ -11,6 +11,18 @@ subject.add('chuck norris', 'strong') subject.to_hash.should == {'chuck norris' => 'strong'} end + + it 'checks whether a dictionary is empty' do + subject.should be_empty + subject.add('I', 'rule') + subject.should_not be_empty + end + + it 'clear all data' do + subject.add('I', 'rule') + subject.clear + subject.should be_empty + end end describe DataImport::CaseIgnoringDictionary do From c4b8d110c4ba2b396ea9d6ea19500cbd102f3d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Sta=CC=88mpfli?= Date: Mon, 29 Oct 2012 08:46:02 +0100 Subject: [PATCH 5/6] resume a partial import --- .gitignore | 4 +- lib/data-import.rb | 3 + lib/data-import/full_migration.rb | 15 ++++ lib/data-import/migration_strategy.rb | 48 +++++++++++++ lib/data-import/partial_migration.rb | 33 +++++++++ lib/data-import/runner.rb | 20 ++---- spec/acceptance/resume_import_spec.rb | 59 +++++++++++++++ spec/unit/data-import/full_migration_spec.rb | 43 +++++++++++ .../data-import/partial_migration_spec.rb | 71 +++++++++++++++++++ spec/unit/data-import/runner_spec.rb | 43 ++++------- 10 files changed, 296 insertions(+), 43 deletions(-) create mode 100644 lib/data-import/full_migration.rb create mode 100644 lib/data-import/migration_strategy.rb create mode 100644 lib/data-import/partial_migration.rb create mode 100644 spec/acceptance/resume_import_spec.rb create mode 100644 spec/unit/data-import/full_migration_spec.rb create mode 100644 spec/unit/data-import/partial_migration_spec.rb diff --git a/.gitignore b/.gitignore index adbca9d..be48240 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ Gemfile.lock pkg/* reports/* bin/* -*.log \ No newline at end of file +*.log +.import_mappings +.import_definitions \ No newline at end of file diff --git a/lib/data-import.rb b/lib/data-import.rb index 19d58e1..9b66b77 100644 --- a/lib/data-import.rb +++ b/lib/data-import.rb @@ -9,6 +9,9 @@ require 'data-import/dependency_resolver' require 'data-import/execution_context' require 'data-import/runner' +require 'data-import/migration_strategy' +require 'data-import/full_migration' +require 'data-import/partial_migration' require 'data-import/execution_plan' require 'data-import/dsl' require 'data-import/sequel/dataset' diff --git a/lib/data-import/full_migration.rb b/lib/data-import/full_migration.rb new file mode 100644 index 0000000..dd70d58 --- /dev/null +++ b/lib/data-import/full_migration.rb @@ -0,0 +1,15 @@ +module DataImport + class FullMigration < MigrationStrategy + def load_completed_definitions + [] + end + + def load_id_mappings + {} + end + + def definitions_to_migrate + resolved_plan.definitions + end + end +end diff --git a/lib/data-import/migration_strategy.rb b/lib/data-import/migration_strategy.rb new file mode 100644 index 0000000..636dae0 --- /dev/null +++ b/lib/data-import/migration_strategy.rb @@ -0,0 +1,48 @@ +module DataImport + class MigrationStrategy + attr_reader :completed_definitions, :settings_storage, :resolved_plan + + def initialize(plan, options = {}, progress_reporter = ProgressBar, settings_store = SettingsStore) + @plan = plan + @progress_reporter = progress_reporter + @settings_storage = settings_store.new + + dependency_resolver = DependencyResolver.new(@plan) + @resolved_plan = dependency_resolver.resolve(:run_only => options[:only]) + @resolved_plan.id_mapping_container.load(load_id_mappings) + + @completed_definitions = load_completed_definitions + end + + def run + definitions_to_migrate.each do |definition| + run_definition(definition) + + completed_definitions << definition.name + settings_storage.save(:completed_definitions => completed_definitions, + :id_mappings => resolved_plan.id_mapping_container.to_hash) + end + end + + def run_definition(definition) + bar = @progress_reporter.new(definition.name, definition.total_steps_required) + + DataImport.logger.info "Starting to import \"#{definition.name}\"" + context = ExecutionContext.new(resolved_plan, definition, bar) + definition.run context + + bar.finish + end + protected :run_definition + + def store_settings(completed_definitions, id_mapping_container) + File.open('.import_definitions', 'w') do |f| + f << Marshal.dump(completed_definitions) + end + File.open('.import_mappings', 'w') do |f| + f << Marshal.dump(id_mapping_container.to_hash) + end + end + protected :store_settings + end +end diff --git a/lib/data-import/partial_migration.rb b/lib/data-import/partial_migration.rb new file mode 100644 index 0000000..6a1c84a --- /dev/null +++ b/lib/data-import/partial_migration.rb @@ -0,0 +1,33 @@ +module DataImport + class PartialMigration < MigrationStrategy + def load_completed_definitions + settings_storage.load[:completed_definitions] || [] + end + + def load_id_mappings + settings_storage.load[:id_mappings] || {} + end + + def definitions_to_migrate + resolved_plan.definitions.reject {|definition| completed_definitions.include?(definition.name)} + end + + def load_definitions_settings + if File.exist? '.import_definitions' + Marshal.load(File.new('.import_definitions')) + else + [] + end + end + private :load_definitions_settings + + def load_mappings_settings + if File.exist? '.import_mappings' + Marshal.load(File.new('.import_mappings')) + else + {} + end + end + private :load_mappings_settings + end +end diff --git a/lib/data-import/runner.rb b/lib/data-import/runner.rb index f81f7b7..05e5a27 100644 --- a/lib/data-import/runner.rb +++ b/lib/data-import/runner.rb @@ -1,23 +1,17 @@ module DataImport class Runner - def initialize(plan, progress_reporter = ProgressBar) + def initialize(plan) @plan = plan - @progress_reporter = progress_reporter end def run(options = {}) - dependency_resolver = DependencyResolver.new(@plan) - resolved_plan = dependency_resolver.resolve(:run_only => options[:only]) - resolved_plan.definitions.each do |definition| - bar = @progress_reporter.new(definition.name, definition.total_steps_required) - - DataImport.logger.info "Starting to import \"#{definition.name}\"" - context = ExecutionContext.new(resolved_plan, definition, bar) - definition.run context - - bar.finish - end + strategy = if options[:partial] + PartialMigration.new(@plan, options) + else + FullMigration.new(@plan, options) + end + strategy.run end end diff --git a/spec/acceptance/resume_import_spec.rb b/spec/acceptance/resume_import_spec.rb new file mode 100644 index 0000000..ec443e5 --- /dev/null +++ b/spec/acceptance/resume_import_spec.rb @@ -0,0 +1,59 @@ +require 'acceptance/spec_helper' + +describe 'resume import' do + + in_memory_mapping do + FileUtils.rm '.Drivers', :force => true + FileUtils.rm '.Cars', :force => true + + import 'Drivers' do + from 'DriverData' + to 'drivers' + + lookup_for :id, :column => :ID + + mapping :ID => :id + end + + import 'Cars' do + dependencies 'Drivers' + + from 'CarData' + to 'cars' + + reference 'Drivers', :DriverID => :driver_id + + mapping :ID => :id + end + end + + database_setup do + source.create_table :CarData do + primary_key :ID + Integer :DriverID + end + source.create_table :DriverData do + primary_key :ID + end + + target.create_table :cars do + primary_key :id + Integer :driver_id + end + target.create_table :drivers do + primary_key :id + end + + source[:DriverData].insert(:ID => 1) + source[:CarData].insert(:ID => 1, :DriverID => 1) + end + + it 'resumes the import where it stopped the last time' do + DataImport.run_plan!(plan, :only => ['Drivers']) + + DataImport.run_plan!(plan, :only => ['Cars'], :partial => true) + + target_database[:drivers].count.should == 1 + target_database[:cars].to_a.should == [{:id => 1, :driver_id => 1}] + end +end diff --git a/spec/unit/data-import/full_migration_spec.rb b/spec/unit/data-import/full_migration_spec.rb new file mode 100644 index 0000000..661e27b --- /dev/null +++ b/spec/unit/data-import/full_migration_spec.rb @@ -0,0 +1,43 @@ +require 'unit/spec_helper' + +describe DataImport::FullMigration do + + let(:mock_progress_class) do + Class.new do + def initialize(name, total_steps); end + + def finish; end + end + end + + context 'with simple definitions' do + let(:people) { DataImport::Definition.new('People', 'tblPerson', 'people', nil) } + let(:animals) { DataImport::Definition.new('Animals', 'tblAnimal', 'animals', nil) } + let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles', nil) } + let(:plan) { DataImport::ExecutionPlan.new } + before do + plan.add_definition(articles) + plan.add_definition(people) + plan.add_definition(animals) + end + + it 'runs a set of definitions' do + subject = described_class.new(plan, {}, mock_progress_class) + + articles.should_receive(:run) + people.should_receive(:run) + animals.should_receive(:run) + + subject.run + end + + it ":only limits the definitions, which will be run" do + subject = described_class.new(plan, {:only => ['People', 'Articles']}, mock_progress_class) + + people.should_receive(:run) + articles.should_receive(:run) + + subject.run + end + end +end diff --git a/spec/unit/data-import/partial_migration_spec.rb b/spec/unit/data-import/partial_migration_spec.rb new file mode 100644 index 0000000..ecddeb0 --- /dev/null +++ b/spec/unit/data-import/partial_migration_spec.rb @@ -0,0 +1,71 @@ +require 'unit/spec_helper' + +describe DataImport::PartialMigration do + + let(:mock_progress_class) do + Class.new do + def initialize(name, total_steps); end + + def finish; end + end + end + + context 'with simple definitions' do + let(:people) { DataImport::Definition.new('People', 'tblPerson', 'people', nil) } + let(:animals) { DataImport::Definition.new('Animals', 'tblAnimal', 'animals', nil) } + let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles', nil) } + let(:plan) { DataImport::ExecutionPlan.new } + before do + plan.add_definition(articles) + plan.add_definition(people) + plan.add_definition(animals) + + File.delete('.import_definitions') if File.exist?('.import_definitions') + File.delete('.import_mappings') if File.exist?('.import_mappings') + end + + it 'runs a set of definitions' do + subject = described_class.new(plan, {}, mock_progress_class) + + articles.should_receive(:run) + people.should_receive(:run) + animals.should_receive(:run) + + subject.run + end + + it ":only limits the definitions, which will be run" do + subject = described_class.new(plan, {:only => ['People', 'Articles']}, mock_progress_class) + + people.should_receive(:run) + articles.should_receive(:run) + + subject.run + end + end + + context 'with already run definitions' do + let(:animals) { DataImport::Definition.new('Animals', 'tblAnimal', 'animals', nil) } + let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles', nil) } + let(:plan) { DataImport::ExecutionPlan.new } + + before do + plan.add_definition(articles) + plan.add_definition(animals) + + File.open('.import_definitions', 'w') do |f| + f << Marshal.dump(['Animals']) + end + File.delete('.import_mappings') if File.exist?('.import_mappings') + end + + subject { described_class.new(plan, {}, mock_progress_class) } + + it 'skips already run definitions' do + articles.should_receive(:run) + animals.should_not_receive(:run) + + subject.run + end + end +end diff --git a/spec/unit/data-import/runner_spec.rb b/spec/unit/data-import/runner_spec.rb index f2a909e..a4a3ebc 100644 --- a/spec/unit/data-import/runner_spec.rb +++ b/spec/unit/data-import/runner_spec.rb @@ -1,41 +1,26 @@ require 'unit/spec_helper' describe DataImport::Runner do + let(:plan) { stub } + subject { described_class.new(plan) } - let(:mock_progress_class) do - Class.new do - def initialize(name, total_steps); end - - def finish; end - end - end - - context 'with simple definitions' do - let(:people) { DataImport::Definition.new('People', 'tblPerson', 'people', nil) } - let(:animals) { DataImport::Definition.new('Animals', 'tblAnimal', 'animals', nil) } - let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles', nil) } - let(:plan) { DataImport::ExecutionPlan.new } - before do - plan.add_definition(articles) - plan.add_definition(people) - plan.add_definition(animals) - end - - subject { DataImport::Runner.new(plan, mock_progress_class) } - - it 'runs a set of definitions' do - articles.should_receive(:run) - people.should_receive(:run) - animals.should_receive(:run) + context 'complete import' do + it 'executes the plan with the full migration strategy' do + strategy = stub + DataImport::FullMigration.should_receive(:new).with(plan, {}).and_return(strategy) + strategy.should_receive(:run) subject.run end + end - it ":only limits the definitions, which will be run" do - people.should_receive(:run) - articles.should_receive(:run) + context 'partial import' do + it 'executes the plan with the partial migration strategy' do + strategy = stub + DataImport::PartialMigration.should_receive(:new).with(plan, :partial => true).and_return(strategy) + strategy.should_receive(:run) - subject.run :only => ['People', 'Articles'] + subject.run :partial => true end end end From 9a6eb46beb49e843c94c908fd591362be33c223c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Sta=CC=88mpfli?= Date: Mon, 29 Oct 2012 16:27:25 +0100 Subject: [PATCH 6/6] refactoring: settings store is a separate concept --- .gitignore | 3 +- lib/data-import.rb | 1 + lib/data-import/migration_strategy.rb | 10 ----- lib/data-import/partial_migration.rb | 18 --------- lib/data-import/settings_store.rb | 19 ++++++++++ spec/unit/data-import/full_migration_spec.rb | 12 +++++- .../data-import/partial_migration_spec.rb | 37 ++++++++++++++----- spec/unit/data-import/settings_store_spec.rb | 23 ++++++++++++ 8 files changed, 82 insertions(+), 41 deletions(-) create mode 100644 lib/data-import/settings_store.rb create mode 100644 spec/unit/data-import/settings_store_spec.rb diff --git a/.gitignore b/.gitignore index be48240..da5d910 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,4 @@ pkg/* reports/* bin/* *.log -.import_mappings -.import_definitions \ No newline at end of file +.import_settings diff --git a/lib/data-import.rb b/lib/data-import.rb index 9b66b77..2c3f90c 100644 --- a/lib/data-import.rb +++ b/lib/data-import.rb @@ -12,6 +12,7 @@ require 'data-import/migration_strategy' require 'data-import/full_migration' require 'data-import/partial_migration' +require 'data-import/settings_store' require 'data-import/execution_plan' require 'data-import/dsl' require 'data-import/sequel/dataset' diff --git a/lib/data-import/migration_strategy.rb b/lib/data-import/migration_strategy.rb index 636dae0..de7a289 100644 --- a/lib/data-import/migration_strategy.rb +++ b/lib/data-import/migration_strategy.rb @@ -34,15 +34,5 @@ def run_definition(definition) bar.finish end protected :run_definition - - def store_settings(completed_definitions, id_mapping_container) - File.open('.import_definitions', 'w') do |f| - f << Marshal.dump(completed_definitions) - end - File.open('.import_mappings', 'w') do |f| - f << Marshal.dump(id_mapping_container.to_hash) - end - end - protected :store_settings end end diff --git a/lib/data-import/partial_migration.rb b/lib/data-import/partial_migration.rb index 6a1c84a..8f45d4c 100644 --- a/lib/data-import/partial_migration.rb +++ b/lib/data-import/partial_migration.rb @@ -11,23 +11,5 @@ def load_id_mappings def definitions_to_migrate resolved_plan.definitions.reject {|definition| completed_definitions.include?(definition.name)} end - - def load_definitions_settings - if File.exist? '.import_definitions' - Marshal.load(File.new('.import_definitions')) - else - [] - end - end - private :load_definitions_settings - - def load_mappings_settings - if File.exist? '.import_mappings' - Marshal.load(File.new('.import_mappings')) - else - {} - end - end - private :load_mappings_settings end end diff --git a/lib/data-import/settings_store.rb b/lib/data-import/settings_store.rb new file mode 100644 index 0000000..2c60287 --- /dev/null +++ b/lib/data-import/settings_store.rb @@ -0,0 +1,19 @@ +module DataImport + class SettingsStore + SETTINGS_FILE = '.import_settings' + + def save(data) + File.open(SETTINGS_FILE, 'w') do |f| + f << Marshal.dump(data) + end + end + + def load + if File.exist?(SETTINGS_FILE) + Marshal.load(File.new(SETTINGS_FILE)) + else + {} + end + end + end +end diff --git a/spec/unit/data-import/full_migration_spec.rb b/spec/unit/data-import/full_migration_spec.rb index 661e27b..5c67ee6 100644 --- a/spec/unit/data-import/full_migration_spec.rb +++ b/spec/unit/data-import/full_migration_spec.rb @@ -10,11 +10,19 @@ def finish; end end end + let(:mock_settings_store) do + Class.new do + def save(data) + end + end + end + context 'with simple definitions' do let(:people) { DataImport::Definition.new('People', 'tblPerson', 'people', nil) } let(:animals) { DataImport::Definition.new('Animals', 'tblAnimal', 'animals', nil) } let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles', nil) } let(:plan) { DataImport::ExecutionPlan.new } + before do plan.add_definition(articles) plan.add_definition(people) @@ -22,7 +30,7 @@ def finish; end end it 'runs a set of definitions' do - subject = described_class.new(plan, {}, mock_progress_class) + subject = described_class.new(plan, {}, mock_progress_class, mock_settings_store) articles.should_receive(:run) people.should_receive(:run) @@ -32,7 +40,7 @@ def finish; end end it ":only limits the definitions, which will be run" do - subject = described_class.new(plan, {:only => ['People', 'Articles']}, mock_progress_class) + subject = described_class.new(plan, {:only => ['People', 'Articles']}, mock_progress_class, mock_settings_store) people.should_receive(:run) articles.should_receive(:run) diff --git a/spec/unit/data-import/partial_migration_spec.rb b/spec/unit/data-import/partial_migration_spec.rb index ecddeb0..f55579a 100644 --- a/spec/unit/data-import/partial_migration_spec.rb +++ b/spec/unit/data-import/partial_migration_spec.rb @@ -15,17 +15,28 @@ def finish; end let(:animals) { DataImport::Definition.new('Animals', 'tblAnimal', 'animals', nil) } let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles', nil) } let(:plan) { DataImport::ExecutionPlan.new } + + let(:mock_settings_store) do + Class.new do + def load + {} + end + + def save(data) + end + end + end + before do plan.add_definition(articles) plan.add_definition(people) plan.add_definition(animals) - File.delete('.import_definitions') if File.exist?('.import_definitions') - File.delete('.import_mappings') if File.exist?('.import_mappings') + File.delete(DataImport::SettingsStore::SETTINGS_FILE) if File.exist?(DataImport::SettingsStore::SETTINGS_FILE) end it 'runs a set of definitions' do - subject = described_class.new(plan, {}, mock_progress_class) + subject = described_class.new(plan, {}, mock_progress_class, mock_settings_store) articles.should_receive(:run) people.should_receive(:run) @@ -35,7 +46,7 @@ def finish; end end it ":only limits the definitions, which will be run" do - subject = described_class.new(plan, {:only => ['People', 'Articles']}, mock_progress_class) + subject = described_class.new(plan, {:only => ['People', 'Articles']}, mock_progress_class, mock_settings_store) people.should_receive(:run) articles.should_receive(:run) @@ -49,17 +60,25 @@ def finish; end let(:articles) { DataImport::Definition.new('Articles', 'tblNewsMessage', 'articles', nil) } let(:plan) { DataImport::ExecutionPlan.new } + let(:mock_settings_store) do + Class.new do + def load + {:completed_definitions => ['Animals']} + end + + def save(data) + end + end + end + before do plan.add_definition(articles) plan.add_definition(animals) - File.open('.import_definitions', 'w') do |f| - f << Marshal.dump(['Animals']) - end - File.delete('.import_mappings') if File.exist?('.import_mappings') + File.delete(DataImport::SettingsStore::SETTINGS_FILE) if File.exist?(DataImport::SettingsStore::SETTINGS_FILE) end - subject { described_class.new(plan, {}, mock_progress_class) } + subject { described_class.new(plan, {}, mock_progress_class, mock_settings_store) } it 'skips already run definitions' do articles.should_receive(:run) diff --git a/spec/unit/data-import/settings_store_spec.rb b/spec/unit/data-import/settings_store_spec.rb new file mode 100644 index 0000000..41f0979 --- /dev/null +++ b/spec/unit/data-import/settings_store_spec.rb @@ -0,0 +1,23 @@ +require 'unit/spec_helper' + +describe DataImport::SettingsStore do + it 'saves and loads the completed definitions' do + subject.save(:completed_definitions => ['animals']) + subject.load[:completed_definitions].should == ['animals'] + end + + it 'saves and loads the id mappings' do + subject.save(:id_mappings => {:mappings => {4 => 7}}) + subject.load[:id_mappings].should == {:mappings => {4 => 7}} + end + + context 'without settings' do + before do + File.delete(DataImport::SettingsStore::SETTINGS_FILE) if File.exist?(DataImport::SettingsStore::SETTINGS_FILE) + end + + it 'loads an empty set' do + subject.load.should == {} + end + end +end