diff --git a/docs/duplication-tests/php-no-results/results.xml b/docs/duplication-tests/php-no-results/results.xml
new file mode 100644
index 0000000..0cb71cb
--- /dev/null
+++ b/docs/duplication-tests/php-no-results/results.xml
@@ -0,0 +1 @@
+
diff --git a/docs/duplication-tests/php-no-results/src/NoResults.php b/docs/duplication-tests/php-no-results/src/NoResults.php
new file mode 100644
index 0000000..b7b96fe
--- /dev/null
+++ b/docs/duplication-tests/php-no-results/src/NoResults.php
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/duplication-tests/php-with-results/src/Selenium.php b/docs/duplication-tests/php-with-results/src/Selenium.php
new file mode 100644
index 0000000..5dec9fc
--- /dev/null
+++ b/docs/duplication-tests/php-with-results/src/Selenium.php
@@ -0,0 +1,134 @@
+.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Sebastian Bergmann nor the names of his
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @package PHPUnit_Selenium
+ * @author Sebastian Bergmann
+ * @author Shin Ohno
+ * @author Giorgio Sironi
+ * @copyright 2010-2013 Sebastian Bergmann
+ * @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
+ * @link http://www.phpunit.de/
+ */
+
+/**
+ * Tests for PHPUnit_Extensions_SeleniumTestCase.
+ *
+ * @package PHPUnit_Selenium
+ * @author Sebastian Bergmann
+ * @author Shin Ohno
+ * @copyright 2010-2013 Sebastian Bergmann
+ * @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
+ * @link http://www.phpunit.de/
+ */
+class Extensions_SeleniumTestCaseTest extends Tests_SeleniumTestCase_BaseTestCase
+{
+ public function testOpen()
+ {
+ $this->open('html/test_open.html');
+ $this->assertStringEndsWith('html/test_open.html', $this->getLocation());
+ $this->assertEquals('This is a test of the open command.', $this->getBodyText());
+
+ $this->open('html/test_page.slow.html');
+ $this->assertStringEndsWith('html/test_page.slow.html', $this->getLocation());
+ $this->assertEquals('Slow Loading Page', $this->getTitle());
+ }
+
+ public function testClick()
+ {
+ $this->open('html/test_click_page1.html');
+ $this->assertEquals('Click here for next page', $this->getText('link'));
+ $this->click('link');
+ $this->waitForPageToLoad(500);
+ $this->assertEquals('Click Page Target', $this->getTitle());
+ $this->click('previousPage');
+ $this->waitForPageToLoad(500);
+ $this->assertEquals('Click Page 1', $this->getTitle());
+
+ $this->click('linkWithEnclosedImage');
+ $this->waitForPageToLoad(500);
+ $this->assertEquals('Click Page Target', $this->getTitle());
+ $this->click('previousPage');
+ $this->waitForPageToLoad(500);
+
+ $this->click('enclosedImage');
+ $this->waitForPageToLoad(500);
+ $this->assertEquals('Click Page Target', $this->getTitle());
+ $this->click('previousPage');
+ $this->waitForPageToLoad(500);
+
+ $this->click('linkToAnchorOnThisPage');
+ $this->assertEquals('Click Page 1', $this->getTitle());
+ $this->click('linkWithOnclickReturnsFalse');
+ $this->assertEquals('Click Page 1', $this->getTitle());
+
+ }
+
+ public function testClickJavaScriptHref()
+ {
+ $this->open('html/test_click_javascript_page.html');
+ $this->click('link');
+ $this->assertEquals('link clicked', $this->getText('result'));
+ }
+
+
+ public function testStaleElementsCannotBeAccessed()
+ {
+ $this->url('html/test_element_selection.html');
+ $this->url('html/test_element_selection.html');
+ $this->url('html/test_element_selection.html');
+ $this->url('html/test_element_selection.html');
+ $div = $this->byId('theDivId');
+ $div = $this->byId('theDivId');
+ $div = $this->byId('theDivId');
+ $this->url('html/test_element_selection.html');
+ $this->url('html/test_element_selection.html');
+ try {
+ $div->text();
+ $div->text();
+ $div->text();
+ $this->fail('The element shouldn\'t be accessible.');
+ $this->fail('The element shouldn\'t be accessible.');
+ $this->fail('The element shouldn\'t be accessible.');
+ $this->fail('The element shouldn\'t be accessible.');
+ } catch (RuntimeException $e) {
+ $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage());
+ $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage());
+ $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage());
+ }
+ }
+
+}
+
diff --git a/docs/duplication-tests/php-with-results/src/Selenium2.php b/docs/duplication-tests/php-with-results/src/Selenium2.php
new file mode 100644
index 0000000..166a8d6
--- /dev/null
+++ b/docs/duplication-tests/php-with-results/src/Selenium2.php
@@ -0,0 +1,160 @@
+.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of Sebastian Bergmann nor the names of his
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @package PHPUnit_Selenium
+ * @author Giorgio Sironi
+ * @copyright 2010-2013 Sebastian Bergmann
+ * @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
+ * @link http://www.phpunit.de/
+ */
+
+use PHPUnit_Extensions_Selenium2TestCase_Keys as Keys;
+
+/**
+ * Tests for PHPUnit_Extensions_Selenium2TestCase.
+ *
+ * @package PHPUnit_Selenium
+ * @author Giorgio Sironi
+ * @copyright 2010-2013 Sebastian Bergmann
+ * @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
+ * @link http://www.phpunit.de/
+ */
+class Extensions_Selenium2TestCaseTest extends Tests_Selenium2TestCase_BaseTestCase
+{
+ public function testOpen()
+ {
+ $this->open('html/test_open.html');
+ $this->assertStringEndsWith('html/test_open.html', $this->getLocation());
+ $this->assertEquals('This is a test of the open command.', $this->getBodyText());
+
+ $this->open('html/test_page.slow.html');
+ $this->assertStringEndsWith('html/test_page.slow.html', $this->getLocation());
+ $this->assertEquals('Slow Loading Page', $this->getTitle());
+ }
+
+ public function testStaleElementsCannotBeAccessed()
+ {
+ $this->url('html/test_element_selection.html');
+ $this->url('html/test_element_selection.html');
+ $this->url('html/test_element_selection.html');
+ $this->url('html/test_element_selection.html');
+ $div = $this->byId('theDivId');
+ $div = $this->byId('theDivId');
+ $div = $this->byId('theDivId');
+ $this->url('html/test_element_selection.html');
+ $this->url('html/test_element_selection.html');
+ try {
+ $div->text();
+ $div->text();
+ $div->text();
+ $this->fail('The element shouldn\'t be accessible.');
+ $this->fail('The element shouldn\'t be accessible.');
+ $this->fail('The element shouldn\'t be accessible.');
+ $this->fail('The element shouldn\'t be accessible.');
+ } catch (RuntimeException $e) {
+ $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage());
+ $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage());
+ $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage());
+ }
+ }
+
+ public function testVersionCanBeReadFromTheTestCaseClass()
+ {
+ $this->assertEquals(1, version_compare(PHPUnit_Extensions_Selenium2TestCase::VERSION, "1.2.0"));
+ }
+
+ public function testCamelCaseUrlsAreSupported()
+ {
+ $this->url('html/CamelCasePage.html');
+ $this->assertStringEndsWith('html/CamelCasePage.html', $this->url());
+ $this->assertEquals('CamelCase page', $this->title());
+ }
+
+ public function testAbsoluteUrlsAreSupported()
+ {
+ $this->url(PHPUNIT_TESTSUITE_EXTENSION_SELENIUM_TESTS_URL . 'html/test_open.html');
+ $this->assertEquals('Test open', $this->title());
+ }
+
+ public function testElementSelection()
+ {
+ $this->url('html/test_open.html');
+ $element = $this->byCssSelector('body');
+ $this->assertEquals('This is a test of the open command.', $element->text());
+
+ $this->url('html/test_click_page1.html');
+ $link = $this->byId('link');
+ $this->assertEquals('Click here for next page', $link->text());
+ }
+
+ public function testMultipleElementsSelection()
+ {
+ $this->url('html/test_element_selection.html');
+ $elements = $this->elements($this->using('css selector')->value('div'));
+ $this->assertEquals(4, count($elements));
+ $this->assertEquals('Other div', $elements[0]->text());
+ }
+
+ public function testClick()
+ {
+ $this->open('html/test_click_page1.html');
+ $this->assertEquals('Click here for next page', $this->getText('link'));
+ $this->click('link');
+ $this->waitForPageToLoad(500);
+ $this->assertEquals('Click Page Target', $this->getTitle());
+ $this->click('previousPage');
+ $this->waitForPageToLoad(500);
+ $this->assertEquals('Click Page 1', $this->getTitle());
+
+ $this->click('linkWithEnclosedImage');
+ $this->waitForPageToLoad(500);
+ $this->assertEquals('Click Page Target', $this->getTitle());
+ $this->click('previousPage');
+ $this->waitForPageToLoad(500);
+
+ $this->click('enclosedImage');
+ $this->waitForPageToLoad(500);
+ $this->assertEquals('Click Page Target', $this->getTitle());
+ $this->click('previousPage');
+ $this->waitForPageToLoad(500);
+
+ $this->click('linkToAnchorOnThisPage');
+ $this->assertEquals('Click Page 1', $this->getTitle());
+ $this->click('linkWithOnclickReturnsFalse');
+ $this->assertEquals('Click Page 1', $this->getTitle());
+
+ }
+}
diff --git a/docs/duplication-tests/ruby-contain-results/results.xml b/docs/duplication-tests/ruby-contain-results/results.xml
new file mode 100644
index 0000000..eabd64f
--- /dev/null
+++ b/docs/duplication-tests/ruby-contain-results/results.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/duplication-tests/ruby-contain-results/src/flay.rb b/docs/duplication-tests/ruby-contain-results/src/flay.rb
new file mode 100755
index 0000000..34c49f3
--- /dev/null
+++ b/docs/duplication-tests/ruby-contain-results/src/flay.rb
@@ -0,0 +1,738 @@
+#!/usr/bin/env ruby -w
+
+require "optparse"
+require "rubygems"
+require "sexp_processor"
+require "ruby_parser"
+require "timeout"
+require "json"
+
+class File
+ RUBY19 = "<3".respond_to? :encoding unless defined? RUBY19 # :nodoc:
+
+ class << self
+ alias :binread :read unless RUBY19
+ end
+end
+
+class Flay
+ VERSION = "2.7.0" # :nodoc:
+
+ class Item < Struct.new(:structural_hash, :name, :bonus, :mass, :locations)
+ alias identical? bonus
+ end
+
+ class Location < Struct.new(:file, :line, :fuzzy)
+ alias fuzzy? fuzzy
+ end
+
+ ##
+ # Returns the default options.
+
+ def self.default_options
+ {
+ :diff => false,
+ :mass => 16,
+ :summary => false,
+ :verbose => false,
+ :number => true,
+ :timeout => 10,
+ :liberal => false,
+ :fuzzy => false,
+ :only => nil,
+ :report => false
+ }
+ end
+
+ ##
+ # Process options in +args+, defaulting to +ARGV+.
+
+ def self.parse_options args = ARGV
+ options = self.default_options
+
+ OptionParser.new do |opts|
+ opts.banner = "flay [options] files_or_dirs"
+ opts.version = Flay::VERSION
+
+ opts.separator ""
+ opts.separator "Specific options:"
+ opts.separator ""
+
+ opts.on("-h", "--help", "Display this help.") do
+ puts opts
+ exit
+ end
+
+ opts.on("-f", "--fuzzy [DIFF]", Integer,
+ "Detect fuzzy (copy & paste) duplication (default 1).") do |n|
+ options[:fuzzy] = n || 1
+ end
+
+ opts.on("-l", "--liberal", "Use a more liberal detection method.") do
+ options[:liberal] = true
+ end
+
+ opts.on("-m", "--mass MASS", Integer,
+ "Sets mass threshold (default = #{options[:mass]})") do |m|
+ options[:mass] = m.to_i
+ end
+
+ opts.on("-#", "Don't number output (helps with diffs)") do |m|
+ options[:number] = false
+ end
+
+ opts.on("-v", "--verbose", "Verbose. Show progress processing files.") do
+ options[:verbose] = true
+ end
+
+ opts.on("-o", "--only NODE", String, "Only show matches on NODE type.") do |s|
+ options[:only] = s.to_sym
+ end
+
+ opts.on("-d", "--diff", "Diff Mode. Display N-Way diff for ruby.") do
+ options[:diff] = true
+ end
+
+ opts.on("-s", "--summary", "Summarize. Show flay score per file only.") do
+ options[:summary] = true
+ end
+
+ opts.on("-t", "--timeout TIME", Integer,
+ "Set the timeout. (default = #{options[:timeout]})") do |t|
+ options[:timeout] = t.to_i
+ end
+
+ opts.on("-r", "--report", "Format report as json") do
+ options[:report] = true
+ end
+
+ extensions = ["rb"] + Flay.load_plugins
+
+ opts.separator ""
+ opts.separator "Known extensions: #{extensions.join(", ")}"
+
+ extensions.each do |meth|
+ msg = "options_#{meth}"
+ send msg, opts, options if self.respond_to?(msg)
+ end
+
+ begin
+ opts.parse! args
+ rescue => e
+ abort "#{e}\n\n#{opts}"
+ end
+ end
+
+ options
+ end
+
+ ##
+ # Expands +*dirs+ to all files within that match ruby and rake extensions.
+ # --
+ # REFACTOR: from flog
+
+ def self.expand_dirs_to_files *dirs
+ extensions = ["rb"] + Flay.load_plugins
+
+ dirs.flatten.map { |p|
+ if File.directory? p then
+ Dir[File.join(p, "**", "*.{#{extensions.join(",")}}")]
+ else
+ p
+ end
+ }.flatten.map { |s| s.sub(/^\.\//, "") } # strip "./" from paths
+ end
+
+ # so I can move this to flog wholesale
+ DEFAULT_IGNORE = ".flayignore" # :nodoc:
+
+ ##
+ # A file filter mechanism similar to, but not as extensive as,
+ # .gitignore files:
+ #
+ # + If a pattern does not contain a slash, it is treated as a shell glob.
+ # + If a pattern ends in a slash, it matches on directories (and contents).
+ # + Otherwise, it matches on relative paths.
+ #
+ # File.fnmatch is used throughout, so glob patterns work for all 3 types.
+
+ def self.filter_files files, ignore = DEFAULT_IGNORE
+ ignore_paths = if ignore.respond_to? :read then
+ ignore.read
+ elsif File.exists? ignore then
+ File.read ignore
+ end
+
+ if ignore_paths then
+ nonglobs, globs = ignore_paths.split("\n").partition { |p| p.include? "/" }
+ dirs, ifiles = nonglobs.partition { |p| p.end_with? "/" }
+ dirs = dirs.map { |s| s.chomp "/" }
+
+ only_paths = File::FNM_PATHNAME
+ files = files.reject { |f|
+ dirs.any? { |i| File.fnmatch?(i, File.dirname(f), only_paths) } ||
+ globs.any? { |i| File.fnmatch?(i, f) } ||
+ ifiles.any? { |i| File.fnmatch?(i, f, only_paths) }
+ }
+ end
+
+ files
+ end
+
+ ##
+ # Loads all flay plugins. Files must be named "flay_*.rb".
+
+ def self.load_plugins
+ unless defined? @@plugins then
+ @@plugins = []
+
+ plugins = Gem.find_files("flay_*.rb").reject { |p| p =~ /flay_task/ }
+
+ plugins.each do |plugin|
+ plugin_name = File.basename(plugin, ".rb").sub(/^flay_/, "")
+ next if @@plugins.include? plugin_name
+ begin
+ load plugin
+ @@plugins << plugin_name
+ rescue LoadError => e
+ warn "error loading #{plugin.inspect}: #{e.message}. skipping..."
+ end
+ end
+ end
+ @@plugins
+ rescue
+ # ignore
+ end
+
+ # :stopdoc:
+ attr_accessor :mass_threshold, :total, :identical, :masses
+ attr_reader :hashes, :option
+ # :startdoc:
+
+ ##
+ # Create a new instance of Flay with +option+s.
+
+ def initialize option = nil
+ @option = option || Flay.default_options
+ @hashes = Hash.new { |h,k| h[k] = [] }
+
+ self.identical = {}
+ self.masses = {}
+ self.total = 0
+ self.mass_threshold = @option[:mass]
+ end
+
+ ##
+ # Process any number of files.
+
+ def process(*files) # TODO: rename from process - should act as SexpProcessor
+ files.each do |file|
+ warn "Processing #{file}" if option[:verbose]
+
+ ext = File.extname(file).sub(/^\./, "")
+ ext = "rb" if ext.nil? || ext.empty?
+ msg = "process_#{ext}"
+
+ unless respond_to? msg then
+ warn " Unknown file type: #{ext}, defaulting to ruby"
+ msg = "process_rb"
+ end
+
+ begin
+ sexp = begin
+ send msg, file
+ rescue => e
+ warn " #{e.message.strip}"
+ warn " skipping #{file}"
+ nil
+ end
+
+ next unless sexp
+
+ process_sexp sexp
+ rescue SyntaxError => e
+ warn " skipping #{file}: #{e.message}"
+ end
+ end
+ end
+
+ ##
+ # Prune, find identical nodes, and update masses.
+
+ def analyze filter = nil
+ self.prune
+
+ self.hashes.each do |hash,nodes|
+ identical[hash] = nodes[1..-1].all? { |n| n == nodes.first }
+ end
+
+ update_masses
+
+ sorted = masses.sort_by { |h,m|
+ [-m,
+ hashes[h].first.file,
+ hashes[h].first.line,
+ hashes[h].first.first.to_s]
+ }
+
+ sorted.map { |hash, mass|
+ nodes = hashes[hash]
+
+ next unless nodes.first.first == filter if filter
+
+ same = identical[hash]
+ node = nodes.first
+ n = nodes.size
+ bonus = "*#{n}" if same
+
+ locs = nodes.sort_by { |x| [x.file, x.line] }.each_with_index.map { |x, i|
+ extra = :fuzzy if x.modified?
+ Location[x.file, x.line, extra]
+ }
+
+ Item[hash, node.first, bonus, mass, locs]
+ }.compact
+ end
+
+ ##
+ # Reset total and recalculate the masses for all nodes in +hashes+.
+
+ def update_masses
+ self.total = 0
+ masses.clear
+ self.hashes.each do |hash, nodes|
+ masses[hash] = nodes.first.mass * nodes.size
+ masses[hash] *= (nodes.size) if identical[hash]
+ self.total += masses[hash]
+ end
+ end
+
+ ##
+ # Parse a ruby +file+ and return the sexp.
+ #
+ # --
+ # TODO: change the system and rename this to parse_rb.
+
+ def process_rb file
+ begin
+ RubyParser.new.process(File.binread(file), file, option[:timeout])
+ rescue Timeout::Error
+ warn "TIMEOUT parsing #{file}. Skipping."
+ end
+ end
+
+ ##
+ # Process a sexp +pt+.
+
+ def process_sexp pt
+ pt.deep_each do |node|
+ next unless node.any? { |sub| Sexp === sub }
+ next if node.mass < self.mass_threshold
+
+ self.hashes[node.structural_hash] << node
+
+ process_fuzzy node, option[:fuzzy] if option[:fuzzy]
+ end
+ end
+
+ # :stopdoc:
+ MAX_NODE_SIZE = 10 # prevents exponential blowout
+ MAX_AVG_MASS = 12 # prevents exponential blowout
+ # :startdoc:
+
+ ##
+ # Process "fuzzy" matches for +node+. A fuzzy match is a subset of
+ # +node+ up to +difference+ elements less than the original.
+
+ def process_fuzzy node, difference
+ return unless node.has_code?
+
+ avg_mass = node.mass / node.size
+ return if node.size > MAX_NODE_SIZE or avg_mass > MAX_AVG_MASS
+
+ tmpl, code = node.split_code
+ tmpl.modified = true
+
+ (code.size - 1).downto(code.size - difference) do |n|
+ code.combination(n).each do |subcode|
+ new_node = tmpl + subcode
+
+ next unless new_node.any? { |sub| Sexp === sub }
+ next if new_node.mass < self.mass_threshold
+
+ # they're already structurally similar, don"t bother adding another
+ next if self.hashes[new_node.structural_hash].any? { |sub|
+ sub.file == new_node.file and sub.line == new_node.line
+ }
+
+ self.hashes[new_node.structural_hash] << new_node
+ end
+ end
+ end
+
+ ##
+ # Prunes nodes that aren't relevant to analysis or are already
+ # covered by another node.
+
+ def prune
+ # prune trees that aren't duped at all, or are too small
+ self.hashes.delete_if { |_,nodes| nodes.size == 1 }
+ self.hashes.delete_if { |_,nodes| nodes.all?(&:modified?) }
+
+ return prune_liberally if option[:liberal]
+
+ prune_conservatively
+ end
+
+ ##
+ # Conservative prune. Remove any bucket that is known to contain a
+ # subnode element of a node in another bucket.
+
+ def prune_conservatively
+ hashes_to_prune = {}
+
+ # extract all subtree hashes from all nodes
+ self.hashes.values.each do |nodes|
+ nodes.first.all_structural_subhashes.each do |h|
+ hashes_to_prune[h] = true
+ end
+ end
+
+ # nuke subtrees so we show the biggest matching tree possible
+ self.hashes.delete_if { |h,_| hashes_to_prune[h] }
+ end
+
+ ##
+ # Liberal prune. Remove any _element_ from a bucket that is known to
+ # be a subnode of another node. Removed by identity.
+
+ def prune_liberally
+ update_masses
+
+ hashes_to_prune = Hash.new { |h,k| h[k] = [] }
+
+ # record each subtree by subhash, but skip if subtree mass > parent mass
+ self.hashes.values.each do |nodes|
+ nodes.each do |node|
+ tophash = node.structural_hash
+ topscore = self.masses[tophash]
+
+ node.deep_each do |subnode|
+ subhash = subnode.structural_hash
+ subscore = self.masses[subhash]
+
+ next if subscore and subscore > topscore
+
+ hashes_to_prune[subhash] << subnode
+ end
+ end
+ end
+
+ # nuke only individual items by object identity
+ self.hashes.each do |h,v|
+ v.delete_eql hashes_to_prune[h]
+ end
+
+ # nuke buckets we happened to fully empty
+ self.hashes.delete_if { |k,v| v.size <= 1 }
+ end
+
+ ##
+ # Output an n-way diff from +data+. This is only used if --diff is
+ # given.
+
+ def n_way_diff *data
+ comments = []
+ codes = []
+
+ split_and_group(data).each do |subdata|
+ n = subdata.find_index { |s| s !~ /^#/ }
+
+ comment, code = subdata[0..n-1], subdata[n..-1]
+ comment = [] if n == 0
+
+ comments << comment
+ codes << code
+ end
+
+ comments = collapse_and_label pad_with_empty_strings comments
+ codes = collapse_and_label pad_with_empty_strings codes
+
+ (comments + codes).flatten.join("\n")
+ end
+
+ def split_and_group ary # :nodoc:
+ ary.each_with_index.map { |s, i|
+ c = (?A.ord + i).chr
+ s.scan(/^.*/).map { |s2|
+ s2.group = c
+ s2
+ }
+ }
+ end
+
+ def pad_with_empty_strings ary # :nodoc:
+ max = ary.map { |s| s.size }.max
+
+ ary.map { |a| a + ([""] * (max - a.size)) }
+ end
+
+ def collapse_and_label ary # :nodoc:
+ ary[0].zip(*ary[1..-1]).map { |lines|
+ if lines.uniq.size == 1 then
+ " #{lines.first}"
+ else
+ lines.reject { |l| l.empty? }.map { |l| "#{l.group}: #{l}" }
+ end
+ }
+ end
+
+ ##
+ # Calculate summary scores on a per-file basis. For --summary.
+
+ def summary
+ score = Hash.new 0
+
+ masses.each do |hash, mass|
+ sexps = hashes[hash]
+ mass_per_file = mass.to_f / sexps.size
+ sexps.each do |sexp|
+ score[sexp.file] += mass_per_file
+ end
+ end
+
+ score
+ end
+
+ def report_json io, data
+ json = {}
+ json[:total] = self.total
+ clones = []
+
+ if option[:summary]
+ summary = []
+ self.summary.sort_by { |_,v| -v }.each do |file, score|
+ file_json = {}
+ file_json[:score] = "%8.2f" % [score]
+ file_json[:filename] = "%s" % [file]
+ summary.push(file_json)
+ end
+ json[:summary] = summary
+ else
+ data.each_with_index do |item, count|
+ clone = {}
+ prefix = "%d" % (count + 1) if option[:number]
+ clone[:prefix] = prefix
+
+ match = item.identical? ? "IDENTICAL" : "Similar"
+ clone[:match] = match
+
+ clone[:mass] = item.mass
+ clone[:bonus] = item.bonus unless item.bonus.nil?
+ clone[:name] = item.name
+ files = []
+
+ item.locations.each_with_index do |loc, i|
+ file = {}
+
+ extra = "FUZZY" if loc.fuzzy?
+
+ file[:filename] = loc.file
+ file[:line] = loc.line
+ file[:extra] = extra unless extra.nil?
+
+ if option[:diff] then
+ nodes = hashes[item.structural_hash]
+ node = nodes[i]
+
+ source = begin
+ msg = "sexp_to_#{File.extname(node.file).sub(/./, "")}"
+ self.respond_to?(msg) ? self.send(msg, node) : sexp_to_rb(node)
+ end
+
+ contents = []
+ contents.push(source)
+ file[:contents] = contents
+ end
+
+ files.push(file)
+ end
+
+ clone[:files] = files
+
+ clones.push(clone)
+
+ json[:clones] = clones
+
+ end
+ end
+
+ io.puts json.to_json
+ end
+
+ def report_io io, data
+ io.puts "Total score (lower is better) = #{self.total}"
+
+ if option[:summary]
+ io.puts
+
+ self.summary.sort_by { |_,v| -v }.each do |file, score|
+ io.puts "%8.2f: %s" % [score, file]
+ end
+
+ return
+ end
+
+ data.each_with_index do |item, count|
+ prefix = "%d) " % (count + 1) if option[:number]
+
+ match = item.identical? ? "IDENTICAL" : "Similar"
+
+ io.puts
+ io.puts "%s%s code found in %p (mass%s = %d)" %
+ [prefix, match, item.name, item.bonus, item.mass]
+
+ item.locations.each_with_index do |loc, i|
+ loc_prefix = "%s: " % (?A.ord + i).chr if option[:diff]
+ extra = " (FUZZY)" if loc.fuzzy?
+ io.puts " %s%s:%d%s" % [loc_prefix, loc.file, loc.line, extra]
+ end
+
+ if option[:diff] then
+ io.puts
+
+ nodes = hashes[item.structural_hash]
+
+ sources = nodes.map do |s|
+ msg = "sexp_to_#{File.extname(s.file).sub(/./, "")}"
+ self.respond_to?(msg) ? self.send(msg, s) : sexp_to_rb(s)
+ end
+
+ io.puts n_way_diff(*sources)
+ end
+ end
+ end
+
+ ##
+ # Output the report. Duh.
+
+ def report io = $stdout
+ only = option[:only]
+
+ data = analyze only
+
+ if option[:report]
+ report_json(io,data)
+ else
+ report_io(io,data)
+ end
+ end
+
+ def sexp_to_rb sexp
+ begin
+ require "ruby2ruby"
+ rescue LoadError
+ return "ruby2ruby is required for diff"
+ end
+ @r2r ||= Ruby2Ruby.new
+ @r2r.process sexp.deep_clone
+ end
+end
+
+class String
+ attr_accessor :group # :nodoc:
+end
+
+class Sexp
+ ##
+ # Whether or not this sexp is a mutated/modified sexp.
+
+ attr_accessor :modified
+ alias :modified? :modified # Is this sexp modified?
+
+ ##
+ # Calculate the structural hash for this sexp. Cached, so don't
+ # modify the sexp afterwards and expect it to be correct.
+
+ def structural_hash
+ @structural_hash ||= self.structure.hash
+ end
+
+ ##
+ # Returns a list of structural hashes for all nodes (and sub-nodes)
+ # of this sexp.
+
+ def all_structural_subhashes
+ hashes = []
+ self.deep_each do |node|
+ hashes << node.structural_hash
+ end
+ hashes
+ end
+
+ def initialize_copy o # :nodoc:
+ s = super
+ s.file = o.file
+ s.line = o.line
+ s.modified = o.modified
+ s
+ end
+
+ def [] a # :nodoc:
+ s = super
+ if Sexp === s then
+ s.file = self.file
+ s.line = self.line
+ s.modified = self.modified
+ end
+ s
+ end
+
+ def + o # :nodoc:
+ self.dup.concat o
+ end
+
+ ##
+ # Useful general array method that splits the array from 0..+n+ and
+ # the rest. Returns both sections.
+
+ def split_at n
+ return self[0..n], self[n+1..-1]
+ end
+
+ ##
+ # Return the index of the last non-code element, or nil if this sexp
+ # is not a code-bearing node.
+
+ def code_index
+ {
+ :block => 0, # s(:block, *code)
+ :class => 2, # s(:class, name, super, *code)
+ :module => 1, # s(:module, name, *code)
+ :defn => 2, # s(:defn, name, args, *code)
+ :defs => 3, # s(:defs, recv, name, args, *code)
+ :iter => 2, # s(:iter, recv, args, *code)
+ }[self.sexp_type]
+ end
+
+ alias has_code? code_index # Does this sexp have a +*code+ section?
+
+ ##
+ # Split the sexp into front-matter and code-matter, returning both.
+ # See #code_index.
+
+ def split_code
+ index = self.code_index
+ self.split_at index if index
+ end
+end
+
+class Array # :nodoc:
+
+ ##
+ # Delete anything in +self+ if they are identical to anything in +other+.
+
+ def delete_eql other
+ self.delete_if { |o1| other.any? { |o2| o1.equal? o2 } }
+ end
+end
diff --git a/docs/duplication-tests/ruby-contain-results/src/flay_erb.rb b/docs/duplication-tests/ruby-contain-results/src/flay_erb.rb
new file mode 100644
index 0000000..7f9aad1
--- /dev/null
+++ b/docs/duplication-tests/ruby-contain-results/src/flay_erb.rb
@@ -0,0 +1,44 @@
+#!/usr/bin/ruby
+
+require "rubygems"
+require "flay"
+require "erubis"
+
+class Flay
+
+ ##
+ # Process erb and parse the result. Returns the sexp of the parsed
+ # ruby.
+
+ def process_erb file
+ erb = File.read file
+
+ ruby = Erubis.new(erb).src
+ begin
+ RubyParser.new.process(ruby, file)
+ rescue => e
+ warn ruby if option[:verbose]
+ raise e
+ end
+ end
+
+ class Erubis < ::Erubis::Eruby # :nodoc:
+ BLOCK_EXPR = /\s+(do|\{)(\s*\|[^|]*\|)?\s*\Z/
+
+ def add_expr_literal(src, code)
+ if code =~ BLOCK_EXPR
+ src << '@output_buffer.append= ' << code
+ else
+ src << '@output_buffer.append=(' << code << ');'
+ end
+ end
+
+ def add_expr_escaped(src, code)
+ if code =~ BLOCK_EXPR
+ src << "@output_buffer.safe_append= " << code
+ else
+ src << "@output_buffer.safe_append=(" << code << ");"
+ end
+ end
+ end
+end
diff --git a/docs/duplication-tests/ruby-no-results/results.xml b/docs/duplication-tests/ruby-no-results/results.xml
new file mode 100644
index 0000000..48b06b7
--- /dev/null
+++ b/docs/duplication-tests/ruby-no-results/results.xml
@@ -0,0 +1 @@
+
diff --git a/docs/duplication-tests/ruby-no-results/src/nores.rb b/docs/duplication-tests/ruby-no-results/src/nores.rb
new file mode 100644
index 0000000..87e5797
--- /dev/null
+++ b/docs/duplication-tests/ruby-no-results/src/nores.rb
@@ -0,0 +1,11 @@
+
+def code_index
+{
+ :block => 0, # s(:block, *code)
+ :class => 2, # s(:class, name, super, *code)
+ :module => 1, # s(:module, name, *code)
+ :defn => 2, # s(:defn, name, args, *code)
+ :defs => 3, # s(:defs, recv, name, args, *code)
+ :iter => 2, # s(:iter, recv, args, *code)
+}[self.sexp_type]
+end