diff --git a/.gitignore b/.gitignore index 9106b2a..39bc02f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ /pkg/ /spec/reports/ /tmp/ +.ruby-gemset +.ruby-version diff --git a/Gemfile b/Gemfile index 9a50d85..2fd5e6f 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + source 'https://rubygems.org' # Specify your gem's dependencies in easy_sax.gemspec diff --git a/Gemfile.lock b/Gemfile.lock index 6a5f71e..c3f23f7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -4,6 +4,7 @@ PATH easy_sax (0.2.0) activesupport (~> 7.0.8) nokogiri (~> 1.16.2) + ox (~> 2.14.18) GEM remote: https://rubygems.org/ @@ -13,10 +14,12 @@ GEM i18n (>= 1.6, < 2) minitest (>= 5.1) tzinfo (~> 2.0) + byebug (11.1.3) coderay (1.1.3) concurrent-ruby (1.2.3) i18n (1.14.1) concurrent-ruby (~> 1.0) + memory_profiler (1.1.0) method_source (1.0.0) minitest (5.20.0) nokogiri (1.16.2-arm64-darwin) @@ -25,9 +28,13 @@ GEM racc (~> 1.4) nokogiri (1.16.2-x86_64-linux) racc (~> 1.4) + ox (2.14.18) pry (0.14.1) coderay (~> 1.1) method_source (~> 1.0) + pry-byebug (3.10.1) + byebug (~> 11.0) + pry (>= 0.13, < 0.15) racc (1.7.3) rake (13.0.6) tzinfo (2.0.6) @@ -37,14 +44,17 @@ PLATFORMS arm64-darwin-21 arm64-darwin-22 arm64-darwin-23 + arm64-darwin-24 x86_64-darwin-20 x86_64-linux DEPENDENCIES bundler (~> 2.3.6) easy_sax! + memory_profiler minitest (~> 5.0) pry + pry-byebug rake BUNDLED WITH diff --git a/README.md b/README.md index 5952a7a..b2f6c41 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,14 @@ # EasySax -EasySax allows you to easily parse large files without the messy syntax needed for working with most Sax parsers. It was inspired after attempting to use [SaxMachine](https://github.com/pauldix/sax-machine) to parse a 500mb XML file that resulted in a huge spike to 2gbs of memory inside a Rails app. EasySax is very lightweight and only stores the element currently being used in memory. It also allows you to access parent elements without storing the whole parent tree in memory. Testing with the same file above, the memory stayed constant and it processed the file much faster. EasySax is currently used in production at EasyBroker. +EasySax allows you to easily parse large files without the messy syntax needed +for working with most Sax parsers. It was inspired after attempting to use +[SaxMachine](https://github.com/pauldix/sax-machine) to parse a 500mb XML file +that resulted in a huge spike to 2gbs of memory inside a Rails app. EasySax is +very lightweight and only stores the element currently being used in memory. It +also allows you to access parent elements without storing the whole parent tree +in memory. Testing with the same file above, the memory stayed constant and it +processed the file much faster. EasySax is currently used in production at +EasyBroker. ## Installation @@ -12,14 +20,20 @@ gem 'easy_sax' And then execute: - $ bundle +```shell +bundle +``` Or install it yourself as: - $ gem install easy_sax +```shell +gem install easy_sax +``` ## Usage + Given the following test XML + ```xml @@ -36,8 +50,8 @@ Given the following test XML Test 3 - + @@ -56,6 +70,7 @@ Given the following test XML ``` + You can parse all the property elements with ```ruby @@ -67,15 +82,18 @@ end Outputs -``` +```shell Property id[2] title[Test 2] Property id[3] title[Test 3] Property id[4] title[Test 4] ``` -You can also use the `text_for` method if you prefer to get text elements. `property.text_for(:title)` is the same as `property[:title].text` except it returns nil if the title element doesn't exist. +You can also use the `text_for` method if you prefer to get text elements. +`property.text_for(:title)` is the same as `property[:title].text` except it +returns nil if the title element doesn't exist. -If you want to print the property image urls you need to let the parser know that it is an array +If you want to print the property image urls you need to let the parser know +that it is an array ```ruby parser = EasySax.parser(File.open('test.xml')) @@ -87,13 +105,14 @@ end Outputs -``` +```shell Property id[2] images ["http://test.com/1.jpg", "http://test.com/2.jpg"] Property id[3] images ["http://test.com/4.jpg", "http://test.com/5.jpg"] Property id[4] images ["http://test.com/3.jpg", "http://test.com/4.jpg"] ``` -Now for something really cool. If you want the root ancestor use the second param in the `parse_each` block +Now for something really cool. If you want the root ancestor use the second +param in the `parse_each` block ```ruby parser = EasySax.parser(File.open('test.xml')) @@ -104,13 +123,14 @@ end Outputs -``` +```shell Property id[2] agency id[1] Property id[3] agency id[1] Property id[4] agency id[2] ``` -Now maybe you're lazy like me and don't care about the `agencies` element and want the `agency` to be the oldest ancestor. +Now maybe you're lazy like me and don't care about the `agencies` element and +want the `agency` to be the oldest ancestor. ```ruby parser = EasySax.parser(File.open('test.xml')) @@ -121,25 +141,112 @@ end Outputs -``` +```shell Property id[2] agency id[1] Property id[3] agency id[1] Property id[4] agency id[2] ``` -You can also use `ignore` to speed up the parser by allowing it to know that it doesn't need to keep track of the those elements. +You can also use `ignore` to speed up the parser by allowing it to know that it +doesn't need to keep track of the those elements. + +## Performance improvement(alpha version) + +Currently there are two parser methods `EasySax.parser` is currently well +tested in production using parser. There is a new method named `ox_parser` that +is backward compatible with current code and examples listed in this readme. + +Behind scenes the improvement is due the replacement of nokogiri for ox. + +### Benchmark setup + +```text +OS: macOS Sequoia 15.1.1 arm64 +Host: MacBook Pro (14-inch, 2021) +Kernel: Darwin 24.1.0 +CPU: Apple M1 Pro (8) @ 3.23 GHz +GPU: Apple M1 Pro (14) @ 1.30 GHz [Integrated] +Memory: 32.00 GiB +ruby 3.3.6 (2024-11-05 revision 75015d4c1f) [arm64-darwin24] +``` + +### Results + +```text +Time Benchmark: + user system total real +Nokogiri: 0.000114 0.000015 0.000129 ( 0.000128) +Ox: 0.000058 0.000002 0.000060 ( 0.000062) + +Memory Benchmark: + +Nokogiri Parser: +Total allocated memory: 22.90625 KB +Total retained memory: 0.0 KB +Total objects allocated: 430 +Total objects retained: 0 + +Ox Parser: +Total allocated memory: 14.984375 KB +Total retained memory: 0.078125 KB +Total objects allocated: 205 +Total objects retained: 2 +``` + +### Performance Conclusion + +The new ox_parser demonstrates significant performance improvements over the +EasySax parser that relies on Nokogiri. Below is a summary of the key metrics: + +1. Execution Time: + + - ox_parser is ~52% faster than EasySax in terms of real execution time. + - Nokogiri: 0.000128 seconds + - Ox: 0.000062 seconds + +2. Memory Usage: + + - Total allocated memory is reduced by ~35% when using ox_parser. + Nokogiri: 22.91 KB + Ox: 14.98 KB + + - Object allocation is reduced by ~52%, making Ox more efficient: + Nokogiri: 430 objects + Ox: 205 objects + +3. Retained Memory: + - While Nokogiri retains 0 KB, ox_parser retains a negligible amount of + 0.078 KB due to its design. However, the overall efficiency in memory + allocation offsets this minor difference. + +### Why Switch to ox_parser? + +- Speed: The ox_parser is approximately 2x faster, ensuring faster XML parsing + for applications with high performance needs. +- Efficiency: Reduces memory usage significantly, benefiting applications + running in constrained environments. +- Backward Compatibility: ox_parser works seamlessly with existing code and + examples listed in this README. + +> [!CAUTION] +> `ox_parser` needs test and monitoring in production environments. + ## Development -After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. +After checking out the repo, run `bin/setup` to install dependencies. Then, run +`rake test` to run the tests. You can also run `bin/console` for an interactive +prompt that will allow you to experiment. -To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). +To install this gem onto your local machine, run `bundle exec rake install`. To +release a new version, update the version number in `version.rb`, and then run +`bundle exec rake release`, which will create a git tag for the version, push +git commits and tags, and push the `.gem` file to +[rubygems.org](https://rubygems.org). ## Contributing -Bug reports and pull requests are welcome on GitHub at https://github.com/easybroker/easy_sax. - +Bug reports and pull requests are welcome on GitHub at [issues](https://github.com/easybroker/easy_sax/issues) ## License The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). - diff --git a/Rakefile b/Rakefile index d6c5113..4caa98e 100644 --- a/Rakefile +++ b/Rakefile @@ -1,10 +1,12 @@ -require "bundler/gem_tasks" -require "rake/testtask" +# frozen_string_literal: true + +require 'bundler/gem_tasks' +require 'rake/testtask' Rake::TestTask.new(:test) do |t| - t.libs << "test" - t.libs << "lib" + t.libs << 'test' + t.libs << 'lib' t.test_files = FileList['test/**/*_test.rb'] end -task :default => :test +task default: :test diff --git a/benchmark/benchmark_parsers.rb b/benchmark/benchmark_parsers.rb new file mode 100644 index 0000000..062cb3d --- /dev/null +++ b/benchmark/benchmark_parsers.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +$LOAD_PATH.unshift File.expand_path('../lib', __dir__) +require 'benchmark' +require 'memory_profiler' +require 'easy_sax' + +TEST_XML = <<~XML + + + Foo + 12345678 + + + Test 2 + + + + + + + + + Bar + + + Test 3 + + + + + + + + +XML + +def create_parser(parser_class) + parser_class.new(StringIO.new(TEST_XML)) +end + +def parse_with_parser(parser) + agencies = [] + parser.parse_each(:agency, ignore: %w[agencies], arrays: %w[properties images]) do |agency| + agencies << agency + end +end + +puts 'Time Benchmark:' +Benchmark.bm(10) do |x| + x.report('Nokogiri:') { parse_with_parser(create_parser(EasySax::Parser)) } + x.report('Ox:') { parse_with_parser(create_parser(EasySax::OxParser)) } +end + +puts "\nMemory Benchmark:" +[[:nokogiri, EasySax::Parser], [:ox, EasySax::OxParser]].each do |name, parser_class| + report = MemoryProfiler.report do + parser = create_parser(parser_class) + parse_with_parser(parser) + end + + puts "\n#{name.capitalize} Parser:" + puts "Total allocated memory: #{report.total_allocated_memsize / 1024.0} KB" + puts "Total retained memory: #{report.total_retained_memsize / 1024.0} KB" + puts "Total objects allocated: #{report.total_allocated}" + puts "Total objects retained: #{report.total_retained}" + # Uncomment the line below for more detailed output: + # report.pretty_print(scale_bytes: true) +end diff --git a/bin/console b/bin/console index 6ea7c45..9fd81ca 100755 --- a/bin/console +++ b/bin/console @@ -1,7 +1,8 @@ #!/usr/bin/env ruby +# frozen_string_literal: true -require "bundler/setup" -require "easy_sax" +require 'bundler/setup' +require 'easy_sax' # You can add fixtures and/or initialization code here to make experimenting # with your gem easier. You can also use a different console, if you like. @@ -10,5 +11,5 @@ require "easy_sax" # require "pry" # Pry.start -require "irb" +require 'irb' IRB.start diff --git a/easy_sax.gemspec b/easy_sax.gemspec index 843d2d7..a2c334f 100644 --- a/easy_sax.gemspec +++ b/easy_sax.gemspec @@ -1,29 +1,33 @@ -# coding: utf-8 -lib = File.expand_path('../lib', __FILE__) +# frozen_string_literal: true + +lib = File.expand_path('lib', __dir__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'easy_sax/version' Gem::Specification.new do |spec| - spec.name = "easy_sax" + spec.name = 'easy_sax' spec.version = EasySax::VERSION - spec.authors = ["Eric Northam"] - spec.email = ["eric@easybroker.com"] + spec.authors = ['Eric Northam', 'Jonathan Monsalve'] + spec.email = ['eric@easybroker.com', 'j.jmonsalveg@gmail.com'] - spec.summary = "A simple SAX parser that enables parsing of large files without the messy syntax of typical SAX parsers." - spec.description = "A simple SAX parser that enables parsing of large files without the messy syntax of typical SAX parsers. Currently depends on Nokogiri." - spec.homepage = "https://github.com/easybroker/easy_sax" - spec.license = "MIT" + spec.summary = 'A simple SAX parser that enables parsing of large files without the messy syntax of typical SAX parsers.' + spec.description = 'A simple SAX parser that enables parsing of large files without the messy syntax of typical SAX parsers. Currently depends on Nokogiri|Ox.' + spec.homepage = 'https://github.com/easybroker/easy_sax' + spec.license = 'MIT' spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } - spec.bindir = "exe" + spec.bindir = 'exe' spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } - spec.require_paths = ["lib"] + spec.require_paths = ['lib'] - spec.add_dependency "nokogiri", "~> 1.16.2" - spec.add_dependency "activesupport", "~> 7.0.8" + spec.add_dependency 'activesupport', '~> 7.0.8' + spec.add_dependency 'nokogiri', '~> 1.16.2' + spec.add_dependency 'ox', '~> 2.14.18' - spec.add_development_dependency "bundler", "~> 2.3.6" - spec.add_development_dependency "rake" - spec.add_development_dependency "minitest", "~> 5.0" - spec.add_development_dependency "pry" + spec.add_development_dependency 'bundler', '~> 2.3.6' + spec.add_development_dependency 'memory_profiler' + spec.add_development_dependency 'minitest', '~> 5.0' + spec.add_development_dependency 'pry' + spec.add_development_dependency 'pry-byebug' + spec.add_development_dependency 'rake' end diff --git a/lib/easy_sax.rb b/lib/easy_sax.rb index 2d0b3b2..865ec4a 100644 --- a/lib/easy_sax.rb +++ b/lib/easy_sax.rb @@ -1,22 +1,35 @@ +# frozen_string_literal: true + require 'easy_sax/version' require 'easy_sax/parse_error' require 'easy_sax/simple_element' require 'easy_sax/parser' +require 'easy_sax/ox_parser' -# A simple SAX parser that enables parsing of large files without -# the messy syntax of typical SAX parsers. Currently depends on -# Nokogiri. -# -# Basic Usage: -# EasySax.parser(io).parse_each(target_element, ignore:, array:) -# target_element: is the element you want to parse -# ignore: are elements that will be ignored and not parsed -# arrays: are the elements that should parsed into arrays -# +# Simple SAX parsers that enable parsing of large files without +# the messy syntax of typical SAX parsers. # You should use a block which returns the parsed target element # and it's ancestors if it has one. module EasySax def self.parser(io) + # Currently depends on Nokogiri. + # + # Basic Usage: + # EasySax.parser(io).parse_each(target_element, ignore:, array:) + # target_element: is the element you want to parse + # ignore: are elements that will be ignored and not parsed + # arrays: are the elements that should parsed into arrays EasySax::Parser.new(io) end + + def self.ox_parser(io) + # Currently depends on ox. + # + # Basic Usage: + # EasySax.oxparser(io).parse_each(target_element, ignore:, array:) + # target_element: is the element you want to parse + # ignore: are elements that will be ignored and not parsed + # arrays: are the elements that should parsed into arrays + EasySax::OxParser.new(io) + end end diff --git a/lib/easy_sax/ox_parser.rb b/lib/easy_sax/ox_parser.rb new file mode 100644 index 0000000..01ee244 --- /dev/null +++ b/lib/easy_sax/ox_parser.rb @@ -0,0 +1,92 @@ +require 'active_support/core_ext/object/blank' +require 'ox' + +module EasySax + class OxParser < ::Ox::Sax + attr_reader :io, + :target_element, + :callback, + :ignorable_elements, + :array_elements, + :element_stack + + def initialize(io) + @io = io + end + + def parse_each(target_element, ignore: [], arrays: [], &block) + @target_element = target_element.to_s + @ignorable_elements = validate_array(:ignore, ignore) + @array_elements = validate_array(:arrays, arrays) + @element_stack = [] + @callback = block + Ox.sax_parse(self, @io) + end + + def start_element(name) + return if ignorable_elements.include?(name.to_s) + + parent = element_stack.last + element = EasySax::SimpleElement.new(name.to_s, {}) + + if parent.nil? + element_stack << element + else + add_child(parent, element) + end + end + + def text(value) + return unless element_stack.last + + element_stack.last.text ||= '' + element_stack.last.text << value.strip + end + + def attr(name, value) + element_stack.last.attrs[name.to_s] = value + end + + def cdata(string) + text(string) + end + + def end_element(name) + return if ignorable_elements.include?(name.to_s) + + element = element_stack.pop + callback.call element, element_stack.first if name.to_s == target_element + end + + def error(string) + raise EasySax::ParseError, string + end + + private + + def validate_array(field, array) + if array.nil? + [] + elsif array.is_a?(Array) + array.map(&:to_s) + else + raise ArgumentError, ('%s must be an Array' % field) + end + end + + def add_child(parent, element) + if array_elements.include?(element.name) + parent[element.name] = [] + element_stack << parent[element.name] + else + if parent.is_a?(Array) + parent << element + elsif element.name != target_element + parent[element.name] = element + end + + element_stack << element + end + end + end +end diff --git a/lib/easy_sax/parse_error.rb b/lib/easy_sax/parse_error.rb index bb64691..141db0a 100644 --- a/lib/easy_sax/parse_error.rb +++ b/lib/easy_sax/parse_error.rb @@ -1,2 +1,6 @@ -class EasySax::ParseError < StandardError +# frozen_string_literal: true + +module EasySax + class ParseError < StandardError + end end diff --git a/lib/easy_sax/parser.rb b/lib/easy_sax/parser.rb index f6aae44..f09f79e 100644 --- a/lib/easy_sax/parser.rb +++ b/lib/easy_sax/parser.rb @@ -1,89 +1,92 @@ require 'active_support/core_ext/object/blank' require 'nokogiri' -class EasySax::Parser < Nokogiri::XML::SAX::Document - attr_reader :io, - :target_element, - :callback, - :ignorable_elements, - :array_elements, - :element_stack, - :element_text - - def initialize(io) - @io = io - end +module EasySax + class Parser < Nokogiri::XML::SAX::Document + attr_reader :io, + :target_element, + :callback, + :ignorable_elements, + :array_elements, + :element_stack, + :element_text + + def initialize(io) + @io = io + end - def parse_each(target_element, ignore: [], arrays: [], &block) - validate_array(:arrays, arrays) - @target_element = target_element.to_s - @ignorable_elements = validate_array(:ignore, ignore) - @array_elements = validate_array(:arrays, arrays) - @element_stack = [] - @callback = block - Nokogiri::XML::SAX::Parser.new(self).parse(io) - end + def parse_each(target_element, ignore: [], arrays: [], &block) + validate_array(:arrays, arrays) + @target_element = target_element.to_s + @ignorable_elements = validate_array(:ignore, ignore) + @array_elements = validate_array(:arrays, arrays) + @element_stack = [] + @callback = block + Nokogiri::XML::SAX::Parser.new(self).parse(io) + end + + def start_element(name, attrs = []) + return if ignorable_elements.include?(name) - def start_element(name, attrs = []) - return if ignorable_elements.include?(name) - @element_text = '' - parent = element_stack.last + @element_text = '' + parent = element_stack.last - if parent.nil? - element_stack << EasySax::SimpleElement.new(name, attrs.to_h) - else - add_child(parent, name, attrs) + if parent.nil? + element_stack << EasySax::SimpleElement.new(name, attrs.to_h) + else + add_child(parent, name, attrs) + end end - end - def characters(string) - @element_text << string if element_text - end + def characters(string) + @element_text << string if element_text + end - def cdata_block(string) - characters(string) - end + def cdata_block(string) + characters(string) + end - def end_element(name) - return if ignorable_elements.include?(name) + def end_element(name) + return if ignorable_elements.include?(name) - element = element_stack.pop - return if element.kind_of?(Array) + element = element_stack.pop + return if element.is_a?(Array) - element.text = element_text.strip if element_text.present? - callback.call element, element_stack.first if name == target_element - end + element.text = element_text.strip if element_text.present? + callback.call element, element_stack.first if name == target_element + end - def error(string) - raise EasySax::ParseError.new(string) - end + def error(string) + raise EasySax::ParseError, string + end - private + private - def validate_array(field, array) - if array.nil? - [] - elsif array.kind_of?(Array) - array.map { |element| element.to_s } - else - raise ArgumentError, ("%s must be an Array" % field) + def validate_array(field, array) + if array.nil? + [] + elsif array.is_a?(Array) + array.map(&:to_s) + else + raise ArgumentError, ('%s must be an Array' % field) + end end - end - def add_child(parent, name, attrs) - if array_elements.include?(name) - parent[name] = [] - element_stack << parent[name] - else - element = EasySax::SimpleElement.new(name, attrs.to_h) - - if parent.kind_of?(Array) - parent << element - elsif name != target_element - parent[name] = element - end + def add_child(parent, name, attrs) + if array_elements.include?(name) + parent[name] = [] + element_stack << parent[name] + else + element = EasySax::SimpleElement.new(name, attrs.to_h) - element_stack << element + if parent.is_a?(Array) + parent << element + elsif name != target_element + parent[name] = element + end + + element_stack << element + end end end end diff --git a/lib/easy_sax/simple_element.rb b/lib/easy_sax/simple_element.rb index f860120..78fe07f 100644 --- a/lib/easy_sax/simple_element.rb +++ b/lib/easy_sax/simple_element.rb @@ -1,34 +1,38 @@ +# frozen_string_literal: true + require 'active_support/core_ext/hash/indifferent_access' -class EasySax::SimpleElement - attr_accessor :name, :attrs, :elements, :text +module EasySax + class SimpleElement + attr_accessor :name, :attrs, :elements, :text - def initialize(name, attrs) - @name = name - @attrs = HashWithIndifferentAccess.new(attrs || {}) - @elements = HashWithIndifferentAccess.new - end + def initialize(name, attrs) + @name = name + @attrs = HashWithIndifferentAccess.new(attrs || {}) + @elements = HashWithIndifferentAccess.new + end - def [](key) - elements[key] - end + def [](key) + elements[key] + end - def []=(key, value) - elements[key] = value - end + def []=(key, value) + elements[key] = value + end - def text_for(key) - elements[key]&.text - end + def text_for(key) + elements[key]&.text + end - def to_h - {}.tap do |hash| - hash[:attrs] = attrs unless attrs.empty? - hash[:elements] = elements unless elements.empty? - hash[:text] = text if text + def to_h + {}.tap do |hash| + hash[:attrs] = attrs unless attrs.empty? + hash[:elements] = elements unless elements.empty? + hash[:text] = text if text + end end - end - alias_method :inspect, :to_h - alias_method :to_s, :to_h + alias inspect to_h + alias to_s to_h + end end diff --git a/lib/easy_sax/version.rb b/lib/easy_sax/version.rb index 80f51cc..a715059 100644 --- a/lib/easy_sax/version.rb +++ b/lib/easy_sax/version.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + module EasySax - VERSION = "0.2.0" + VERSION = '0.2.1' end diff --git a/test/parser_test.rb b/test/parser_test.rb index 83b2784..d377f40 100644 --- a/test/parser_test.rb +++ b/test/parser_test.rb @@ -1,8 +1,10 @@ +# frozen_string_literal: true + require 'test_helper' require 'pry' -class ParserTest < Minitest::Test - TEST_XML = %{ +module ParserTestHelper + TEST_XML = %( Foo @@ -37,7 +39,7 @@ class ParserTest < Minitest::Test - } + ) PROPERTY_1 = { attrs: { 'id' => '2' }, @@ -47,14 +49,14 @@ class ParserTest < Minitest::Test }, 'images' => [ { - attrs: { 'url' => 'http://test.com/1.jpg' }, + attrs: { 'url' => 'http://test.com/1.jpg' } }, { - attrs: { 'url' => 'http://test.com/2.jpg' }, - }, + attrs: { 'url' => 'http://test.com/2.jpg' } + } ] } - } + }.freeze PROPERTY_2 = { attrs: { 'id' => '3' }, @@ -64,14 +66,14 @@ class ParserTest < Minitest::Test }, 'images' => [ { - attrs: { 'url' => 'http://test.com/4.jpg' }, + attrs: { 'url' => 'http://test.com/4.jpg' } }, { - attrs: { 'url' => 'http://test.com/5.jpg' }, - }, + attrs: { 'url' => 'http://test.com/5.jpg' } + } ] } - } + }.freeze PROPERTY_3 = { attrs: { 'id' => '4' }, @@ -81,14 +83,14 @@ class ParserTest < Minitest::Test }, 'images' => [ { - attrs: { 'url' => 'http://test.com/3.jpg' }, + attrs: { 'url' => 'http://test.com/3.jpg' } }, { - attrs: { 'url' => 'http://test.com/4.jpg' }, - }, + attrs: { 'url' => 'http://test.com/4.jpg' } + } ] } - } + }.freeze def test_that_it_has_a_version_number refute_nil ::EasySax::VERSION @@ -97,9 +99,8 @@ def test_that_it_has_a_version_number def test_target_element_with_no_parents agencies = [] new_parser.parse_each(:agency, - ignore: %w{agencies}, - arrays: %w{properties features images} - ) do |agency| + ignore: %w[agencies], + arrays: %w[properties features images]) do |agency| agencies << agency end @@ -127,9 +128,8 @@ def test_target_element_with_parents agencies = {} properties = {} new_parser.parse_each(:property, - ignore: %w{agencies properties}, - arrays: %w{features images} - ) do |property, agency| + ignore: %w[agencies properties], + arrays: %w[features images]) do |property, agency| property_id = property.attrs['id'].to_i properties[property_id] = property agencies[property_id] = agency @@ -162,14 +162,13 @@ def test_target_element_with_parents def test_target_element_with_child_arrays properties = {} new_parser.parse_each(:property, - arrays: [:images] - ) do |property| + arrays: [:images]) do |property| property_id = property.attrs['id'].to_i properties[property_id] = property['images'].map { |image| image.attrs['url'] } end [PROPERTY_1, PROPERTY_2, PROPERTY_3].each do |property| - urls = property[:elements]['images'].map {|hash| hash[:attrs]['url']} + urls = property[:elements]['images'].map { |hash| hash[:attrs]['url'] } id = property[:attrs]['id'].to_i assert_equal urls, properties[id] end @@ -188,22 +187,42 @@ def test_invalid_xml_throws_error def test_validates_param_options_should_be_arrays assert_raises ArgumentError do new_parser.parse_each(:property, - ignore: 'agencies properties', - arrays: %w{features images} - ) + ignore: 'agencies properties', + arrays: %w[features images]) end assert_raises ArgumentError do new_parser.parse_each(:property, - ignore: %w{agencies properties}, - arrays: 'features images' - ) + ignore: %w[agencies properties], + arrays: 'features images') end end +end + +class EasySaxParserTest < Minitest::Test + include ParserTestHelper private + def parser_class + EasySax::Parser + end + + def new_parser + parser_class.new(StringIO.new(TEST_XML)) + end +end + +class EasySaxOxParserTest < Minitest::Test + include ParserTestHelper + + private + + def parser_class + EasySax::OxParser + end + def new_parser - EasySax::Parser.new(StringIO.new(TEST_XML)) + parser_class.new(StringIO.new(TEST_XML)) end end diff --git a/test/simple_element_test.rb b/test/simple_element_test.rb index 42bd244..46e96fd 100644 --- a/test/simple_element_test.rb +++ b/test/simple_element_test.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'test_helper' class SimpleElementTest < Minitest::Test diff --git a/test/test_helper.rb b/test/test_helper.rb index aac4e80..82c6151 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,4 +1,6 @@ -$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) +# frozen_string_literal: true + +$LOAD_PATH.unshift File.expand_path('../lib', __dir__) require 'easy_sax' require 'minitest/autorun'