diff --git a/docs/duplication-tests/php-no-results/results.xml b/docs/duplication-tests/php-no-results/results.xml new file mode 100644 index 0000000..0cb71cb --- /dev/null +++ b/docs/duplication-tests/php-no-results/results.xml @@ -0,0 +1 @@ + diff --git a/docs/duplication-tests/php-no-results/src/NoResults.php b/docs/duplication-tests/php-no-results/src/NoResults.php new file mode 100644 index 0000000..b7b96fe --- /dev/null +++ b/docs/duplication-tests/php-no-results/src/NoResults.php @@ -0,0 +1,9 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/duplication-tests/php-with-results/src/Selenium.php b/docs/duplication-tests/php-with-results/src/Selenium.php new file mode 100644 index 0000000..5dec9fc --- /dev/null +++ b/docs/duplication-tests/php-with-results/src/Selenium.php @@ -0,0 +1,134 @@ +. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Sebastian Bergmann nor the names of his + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @package PHPUnit_Selenium + * @author Sebastian Bergmann + * @author Shin Ohno + * @author Giorgio Sironi + * @copyright 2010-2013 Sebastian Bergmann + * @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License + * @link http://www.phpunit.de/ + */ + +/** + * Tests for PHPUnit_Extensions_SeleniumTestCase. + * + * @package PHPUnit_Selenium + * @author Sebastian Bergmann + * @author Shin Ohno + * @copyright 2010-2013 Sebastian Bergmann + * @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License + * @link http://www.phpunit.de/ + */ +class Extensions_SeleniumTestCaseTest extends Tests_SeleniumTestCase_BaseTestCase +{ + public function testOpen() + { + $this->open('html/test_open.html'); + $this->assertStringEndsWith('html/test_open.html', $this->getLocation()); + $this->assertEquals('This is a test of the open command.', $this->getBodyText()); + + $this->open('html/test_page.slow.html'); + $this->assertStringEndsWith('html/test_page.slow.html', $this->getLocation()); + $this->assertEquals('Slow Loading Page', $this->getTitle()); + } + + public function testClick() + { + $this->open('html/test_click_page1.html'); + $this->assertEquals('Click here for next page', $this->getText('link')); + $this->click('link'); + $this->waitForPageToLoad(500); + $this->assertEquals('Click Page Target', $this->getTitle()); + $this->click('previousPage'); + $this->waitForPageToLoad(500); + $this->assertEquals('Click Page 1', $this->getTitle()); + + $this->click('linkWithEnclosedImage'); + $this->waitForPageToLoad(500); + $this->assertEquals('Click Page Target', $this->getTitle()); + $this->click('previousPage'); + $this->waitForPageToLoad(500); + + $this->click('enclosedImage'); + $this->waitForPageToLoad(500); + $this->assertEquals('Click Page Target', $this->getTitle()); + $this->click('previousPage'); + $this->waitForPageToLoad(500); + + $this->click('linkToAnchorOnThisPage'); + $this->assertEquals('Click Page 1', $this->getTitle()); + $this->click('linkWithOnclickReturnsFalse'); + $this->assertEquals('Click Page 1', $this->getTitle()); + + } + + public function testClickJavaScriptHref() + { + $this->open('html/test_click_javascript_page.html'); + $this->click('link'); + $this->assertEquals('link clicked', $this->getText('result')); + } + + + public function testStaleElementsCannotBeAccessed() + { + $this->url('html/test_element_selection.html'); + $this->url('html/test_element_selection.html'); + $this->url('html/test_element_selection.html'); + $this->url('html/test_element_selection.html'); + $div = $this->byId('theDivId'); + $div = $this->byId('theDivId'); + $div = $this->byId('theDivId'); + $this->url('html/test_element_selection.html'); + $this->url('html/test_element_selection.html'); + try { + $div->text(); + $div->text(); + $div->text(); + $this->fail('The element shouldn\'t be accessible.'); + $this->fail('The element shouldn\'t be accessible.'); + $this->fail('The element shouldn\'t be accessible.'); + $this->fail('The element shouldn\'t be accessible.'); + } catch (RuntimeException $e) { + $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage()); + $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage()); + $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage()); + } + } + +} + diff --git a/docs/duplication-tests/php-with-results/src/Selenium2.php b/docs/duplication-tests/php-with-results/src/Selenium2.php new file mode 100644 index 0000000..166a8d6 --- /dev/null +++ b/docs/duplication-tests/php-with-results/src/Selenium2.php @@ -0,0 +1,160 @@ +. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Sebastian Bergmann nor the names of his + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @package PHPUnit_Selenium + * @author Giorgio Sironi + * @copyright 2010-2013 Sebastian Bergmann + * @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License + * @link http://www.phpunit.de/ + */ + +use PHPUnit_Extensions_Selenium2TestCase_Keys as Keys; + +/** + * Tests for PHPUnit_Extensions_Selenium2TestCase. + * + * @package PHPUnit_Selenium + * @author Giorgio Sironi + * @copyright 2010-2013 Sebastian Bergmann + * @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License + * @link http://www.phpunit.de/ + */ +class Extensions_Selenium2TestCaseTest extends Tests_Selenium2TestCase_BaseTestCase +{ + public function testOpen() + { + $this->open('html/test_open.html'); + $this->assertStringEndsWith('html/test_open.html', $this->getLocation()); + $this->assertEquals('This is a test of the open command.', $this->getBodyText()); + + $this->open('html/test_page.slow.html'); + $this->assertStringEndsWith('html/test_page.slow.html', $this->getLocation()); + $this->assertEquals('Slow Loading Page', $this->getTitle()); + } + + public function testStaleElementsCannotBeAccessed() + { + $this->url('html/test_element_selection.html'); + $this->url('html/test_element_selection.html'); + $this->url('html/test_element_selection.html'); + $this->url('html/test_element_selection.html'); + $div = $this->byId('theDivId'); + $div = $this->byId('theDivId'); + $div = $this->byId('theDivId'); + $this->url('html/test_element_selection.html'); + $this->url('html/test_element_selection.html'); + try { + $div->text(); + $div->text(); + $div->text(); + $this->fail('The element shouldn\'t be accessible.'); + $this->fail('The element shouldn\'t be accessible.'); + $this->fail('The element shouldn\'t be accessible.'); + $this->fail('The element shouldn\'t be accessible.'); + } catch (RuntimeException $e) { + $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage()); + $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage()); + $this->assertContains('http://seleniumhq.org/exceptions/stale_element_reference.html', $e->getMessage()); + } + } + + public function testVersionCanBeReadFromTheTestCaseClass() + { + $this->assertEquals(1, version_compare(PHPUnit_Extensions_Selenium2TestCase::VERSION, "1.2.0")); + } + + public function testCamelCaseUrlsAreSupported() + { + $this->url('html/CamelCasePage.html'); + $this->assertStringEndsWith('html/CamelCasePage.html', $this->url()); + $this->assertEquals('CamelCase page', $this->title()); + } + + public function testAbsoluteUrlsAreSupported() + { + $this->url(PHPUNIT_TESTSUITE_EXTENSION_SELENIUM_TESTS_URL . 'html/test_open.html'); + $this->assertEquals('Test open', $this->title()); + } + + public function testElementSelection() + { + $this->url('html/test_open.html'); + $element = $this->byCssSelector('body'); + $this->assertEquals('This is a test of the open command.', $element->text()); + + $this->url('html/test_click_page1.html'); + $link = $this->byId('link'); + $this->assertEquals('Click here for next page', $link->text()); + } + + public function testMultipleElementsSelection() + { + $this->url('html/test_element_selection.html'); + $elements = $this->elements($this->using('css selector')->value('div')); + $this->assertEquals(4, count($elements)); + $this->assertEquals('Other div', $elements[0]->text()); + } + + public function testClick() + { + $this->open('html/test_click_page1.html'); + $this->assertEquals('Click here for next page', $this->getText('link')); + $this->click('link'); + $this->waitForPageToLoad(500); + $this->assertEquals('Click Page Target', $this->getTitle()); + $this->click('previousPage'); + $this->waitForPageToLoad(500); + $this->assertEquals('Click Page 1', $this->getTitle()); + + $this->click('linkWithEnclosedImage'); + $this->waitForPageToLoad(500); + $this->assertEquals('Click Page Target', $this->getTitle()); + $this->click('previousPage'); + $this->waitForPageToLoad(500); + + $this->click('enclosedImage'); + $this->waitForPageToLoad(500); + $this->assertEquals('Click Page Target', $this->getTitle()); + $this->click('previousPage'); + $this->waitForPageToLoad(500); + + $this->click('linkToAnchorOnThisPage'); + $this->assertEquals('Click Page 1', $this->getTitle()); + $this->click('linkWithOnclickReturnsFalse'); + $this->assertEquals('Click Page 1', $this->getTitle()); + + } +} diff --git a/docs/duplication-tests/ruby-contain-results/results.xml b/docs/duplication-tests/ruby-contain-results/results.xml new file mode 100644 index 0000000..eabd64f --- /dev/null +++ b/docs/duplication-tests/ruby-contain-results/results.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/duplication-tests/ruby-contain-results/src/flay.rb b/docs/duplication-tests/ruby-contain-results/src/flay.rb new file mode 100755 index 0000000..34c49f3 --- /dev/null +++ b/docs/duplication-tests/ruby-contain-results/src/flay.rb @@ -0,0 +1,738 @@ +#!/usr/bin/env ruby -w + +require "optparse" +require "rubygems" +require "sexp_processor" +require "ruby_parser" +require "timeout" +require "json" + +class File + RUBY19 = "<3".respond_to? :encoding unless defined? RUBY19 # :nodoc: + + class << self + alias :binread :read unless RUBY19 + end +end + +class Flay + VERSION = "2.7.0" # :nodoc: + + class Item < Struct.new(:structural_hash, :name, :bonus, :mass, :locations) + alias identical? bonus + end + + class Location < Struct.new(:file, :line, :fuzzy) + alias fuzzy? fuzzy + end + + ## + # Returns the default options. + + def self.default_options + { + :diff => false, + :mass => 16, + :summary => false, + :verbose => false, + :number => true, + :timeout => 10, + :liberal => false, + :fuzzy => false, + :only => nil, + :report => false + } + end + + ## + # Process options in +args+, defaulting to +ARGV+. + + def self.parse_options args = ARGV + options = self.default_options + + OptionParser.new do |opts| + opts.banner = "flay [options] files_or_dirs" + opts.version = Flay::VERSION + + opts.separator "" + opts.separator "Specific options:" + opts.separator "" + + opts.on("-h", "--help", "Display this help.") do + puts opts + exit + end + + opts.on("-f", "--fuzzy [DIFF]", Integer, + "Detect fuzzy (copy & paste) duplication (default 1).") do |n| + options[:fuzzy] = n || 1 + end + + opts.on("-l", "--liberal", "Use a more liberal detection method.") do + options[:liberal] = true + end + + opts.on("-m", "--mass MASS", Integer, + "Sets mass threshold (default = #{options[:mass]})") do |m| + options[:mass] = m.to_i + end + + opts.on("-#", "Don't number output (helps with diffs)") do |m| + options[:number] = false + end + + opts.on("-v", "--verbose", "Verbose. Show progress processing files.") do + options[:verbose] = true + end + + opts.on("-o", "--only NODE", String, "Only show matches on NODE type.") do |s| + options[:only] = s.to_sym + end + + opts.on("-d", "--diff", "Diff Mode. Display N-Way diff for ruby.") do + options[:diff] = true + end + + opts.on("-s", "--summary", "Summarize. Show flay score per file only.") do + options[:summary] = true + end + + opts.on("-t", "--timeout TIME", Integer, + "Set the timeout. (default = #{options[:timeout]})") do |t| + options[:timeout] = t.to_i + end + + opts.on("-r", "--report", "Format report as json") do + options[:report] = true + end + + extensions = ["rb"] + Flay.load_plugins + + opts.separator "" + opts.separator "Known extensions: #{extensions.join(", ")}" + + extensions.each do |meth| + msg = "options_#{meth}" + send msg, opts, options if self.respond_to?(msg) + end + + begin + opts.parse! args + rescue => e + abort "#{e}\n\n#{opts}" + end + end + + options + end + + ## + # Expands +*dirs+ to all files within that match ruby and rake extensions. + # -- + # REFACTOR: from flog + + def self.expand_dirs_to_files *dirs + extensions = ["rb"] + Flay.load_plugins + + dirs.flatten.map { |p| + if File.directory? p then + Dir[File.join(p, "**", "*.{#{extensions.join(",")}}")] + else + p + end + }.flatten.map { |s| s.sub(/^\.\//, "") } # strip "./" from paths + end + + # so I can move this to flog wholesale + DEFAULT_IGNORE = ".flayignore" # :nodoc: + + ## + # A file filter mechanism similar to, but not as extensive as, + # .gitignore files: + # + # + If a pattern does not contain a slash, it is treated as a shell glob. + # + If a pattern ends in a slash, it matches on directories (and contents). + # + Otherwise, it matches on relative paths. + # + # File.fnmatch is used throughout, so glob patterns work for all 3 types. + + def self.filter_files files, ignore = DEFAULT_IGNORE + ignore_paths = if ignore.respond_to? :read then + ignore.read + elsif File.exists? ignore then + File.read ignore + end + + if ignore_paths then + nonglobs, globs = ignore_paths.split("\n").partition { |p| p.include? "/" } + dirs, ifiles = nonglobs.partition { |p| p.end_with? "/" } + dirs = dirs.map { |s| s.chomp "/" } + + only_paths = File::FNM_PATHNAME + files = files.reject { |f| + dirs.any? { |i| File.fnmatch?(i, File.dirname(f), only_paths) } || + globs.any? { |i| File.fnmatch?(i, f) } || + ifiles.any? { |i| File.fnmatch?(i, f, only_paths) } + } + end + + files + end + + ## + # Loads all flay plugins. Files must be named "flay_*.rb". + + def self.load_plugins + unless defined? @@plugins then + @@plugins = [] + + plugins = Gem.find_files("flay_*.rb").reject { |p| p =~ /flay_task/ } + + plugins.each do |plugin| + plugin_name = File.basename(plugin, ".rb").sub(/^flay_/, "") + next if @@plugins.include? plugin_name + begin + load plugin + @@plugins << plugin_name + rescue LoadError => e + warn "error loading #{plugin.inspect}: #{e.message}. skipping..." + end + end + end + @@plugins + rescue + # ignore + end + + # :stopdoc: + attr_accessor :mass_threshold, :total, :identical, :masses + attr_reader :hashes, :option + # :startdoc: + + ## + # Create a new instance of Flay with +option+s. + + def initialize option = nil + @option = option || Flay.default_options + @hashes = Hash.new { |h,k| h[k] = [] } + + self.identical = {} + self.masses = {} + self.total = 0 + self.mass_threshold = @option[:mass] + end + + ## + # Process any number of files. + + def process(*files) # TODO: rename from process - should act as SexpProcessor + files.each do |file| + warn "Processing #{file}" if option[:verbose] + + ext = File.extname(file).sub(/^\./, "") + ext = "rb" if ext.nil? || ext.empty? + msg = "process_#{ext}" + + unless respond_to? msg then + warn " Unknown file type: #{ext}, defaulting to ruby" + msg = "process_rb" + end + + begin + sexp = begin + send msg, file + rescue => e + warn " #{e.message.strip}" + warn " skipping #{file}" + nil + end + + next unless sexp + + process_sexp sexp + rescue SyntaxError => e + warn " skipping #{file}: #{e.message}" + end + end + end + + ## + # Prune, find identical nodes, and update masses. + + def analyze filter = nil + self.prune + + self.hashes.each do |hash,nodes| + identical[hash] = nodes[1..-1].all? { |n| n == nodes.first } + end + + update_masses + + sorted = masses.sort_by { |h,m| + [-m, + hashes[h].first.file, + hashes[h].first.line, + hashes[h].first.first.to_s] + } + + sorted.map { |hash, mass| + nodes = hashes[hash] + + next unless nodes.first.first == filter if filter + + same = identical[hash] + node = nodes.first + n = nodes.size + bonus = "*#{n}" if same + + locs = nodes.sort_by { |x| [x.file, x.line] }.each_with_index.map { |x, i| + extra = :fuzzy if x.modified? + Location[x.file, x.line, extra] + } + + Item[hash, node.first, bonus, mass, locs] + }.compact + end + + ## + # Reset total and recalculate the masses for all nodes in +hashes+. + + def update_masses + self.total = 0 + masses.clear + self.hashes.each do |hash, nodes| + masses[hash] = nodes.first.mass * nodes.size + masses[hash] *= (nodes.size) if identical[hash] + self.total += masses[hash] + end + end + + ## + # Parse a ruby +file+ and return the sexp. + # + # -- + # TODO: change the system and rename this to parse_rb. + + def process_rb file + begin + RubyParser.new.process(File.binread(file), file, option[:timeout]) + rescue Timeout::Error + warn "TIMEOUT parsing #{file}. Skipping." + end + end + + ## + # Process a sexp +pt+. + + def process_sexp pt + pt.deep_each do |node| + next unless node.any? { |sub| Sexp === sub } + next if node.mass < self.mass_threshold + + self.hashes[node.structural_hash] << node + + process_fuzzy node, option[:fuzzy] if option[:fuzzy] + end + end + + # :stopdoc: + MAX_NODE_SIZE = 10 # prevents exponential blowout + MAX_AVG_MASS = 12 # prevents exponential blowout + # :startdoc: + + ## + # Process "fuzzy" matches for +node+. A fuzzy match is a subset of + # +node+ up to +difference+ elements less than the original. + + def process_fuzzy node, difference + return unless node.has_code? + + avg_mass = node.mass / node.size + return if node.size > MAX_NODE_SIZE or avg_mass > MAX_AVG_MASS + + tmpl, code = node.split_code + tmpl.modified = true + + (code.size - 1).downto(code.size - difference) do |n| + code.combination(n).each do |subcode| + new_node = tmpl + subcode + + next unless new_node.any? { |sub| Sexp === sub } + next if new_node.mass < self.mass_threshold + + # they're already structurally similar, don"t bother adding another + next if self.hashes[new_node.structural_hash].any? { |sub| + sub.file == new_node.file and sub.line == new_node.line + } + + self.hashes[new_node.structural_hash] << new_node + end + end + end + + ## + # Prunes nodes that aren't relevant to analysis or are already + # covered by another node. + + def prune + # prune trees that aren't duped at all, or are too small + self.hashes.delete_if { |_,nodes| nodes.size == 1 } + self.hashes.delete_if { |_,nodes| nodes.all?(&:modified?) } + + return prune_liberally if option[:liberal] + + prune_conservatively + end + + ## + # Conservative prune. Remove any bucket that is known to contain a + # subnode element of a node in another bucket. + + def prune_conservatively + hashes_to_prune = {} + + # extract all subtree hashes from all nodes + self.hashes.values.each do |nodes| + nodes.first.all_structural_subhashes.each do |h| + hashes_to_prune[h] = true + end + end + + # nuke subtrees so we show the biggest matching tree possible + self.hashes.delete_if { |h,_| hashes_to_prune[h] } + end + + ## + # Liberal prune. Remove any _element_ from a bucket that is known to + # be a subnode of another node. Removed by identity. + + def prune_liberally + update_masses + + hashes_to_prune = Hash.new { |h,k| h[k] = [] } + + # record each subtree by subhash, but skip if subtree mass > parent mass + self.hashes.values.each do |nodes| + nodes.each do |node| + tophash = node.structural_hash + topscore = self.masses[tophash] + + node.deep_each do |subnode| + subhash = subnode.structural_hash + subscore = self.masses[subhash] + + next if subscore and subscore > topscore + + hashes_to_prune[subhash] << subnode + end + end + end + + # nuke only individual items by object identity + self.hashes.each do |h,v| + v.delete_eql hashes_to_prune[h] + end + + # nuke buckets we happened to fully empty + self.hashes.delete_if { |k,v| v.size <= 1 } + end + + ## + # Output an n-way diff from +data+. This is only used if --diff is + # given. + + def n_way_diff *data + comments = [] + codes = [] + + split_and_group(data).each do |subdata| + n = subdata.find_index { |s| s !~ /^#/ } + + comment, code = subdata[0..n-1], subdata[n..-1] + comment = [] if n == 0 + + comments << comment + codes << code + end + + comments = collapse_and_label pad_with_empty_strings comments + codes = collapse_and_label pad_with_empty_strings codes + + (comments + codes).flatten.join("\n") + end + + def split_and_group ary # :nodoc: + ary.each_with_index.map { |s, i| + c = (?A.ord + i).chr + s.scan(/^.*/).map { |s2| + s2.group = c + s2 + } + } + end + + def pad_with_empty_strings ary # :nodoc: + max = ary.map { |s| s.size }.max + + ary.map { |a| a + ([""] * (max - a.size)) } + end + + def collapse_and_label ary # :nodoc: + ary[0].zip(*ary[1..-1]).map { |lines| + if lines.uniq.size == 1 then + " #{lines.first}" + else + lines.reject { |l| l.empty? }.map { |l| "#{l.group}: #{l}" } + end + } + end + + ## + # Calculate summary scores on a per-file basis. For --summary. + + def summary + score = Hash.new 0 + + masses.each do |hash, mass| + sexps = hashes[hash] + mass_per_file = mass.to_f / sexps.size + sexps.each do |sexp| + score[sexp.file] += mass_per_file + end + end + + score + end + + def report_json io, data + json = {} + json[:total] = self.total + clones = [] + + if option[:summary] + summary = [] + self.summary.sort_by { |_,v| -v }.each do |file, score| + file_json = {} + file_json[:score] = "%8.2f" % [score] + file_json[:filename] = "%s" % [file] + summary.push(file_json) + end + json[:summary] = summary + else + data.each_with_index do |item, count| + clone = {} + prefix = "%d" % (count + 1) if option[:number] + clone[:prefix] = prefix + + match = item.identical? ? "IDENTICAL" : "Similar" + clone[:match] = match + + clone[:mass] = item.mass + clone[:bonus] = item.bonus unless item.bonus.nil? + clone[:name] = item.name + files = [] + + item.locations.each_with_index do |loc, i| + file = {} + + extra = "FUZZY" if loc.fuzzy? + + file[:filename] = loc.file + file[:line] = loc.line + file[:extra] = extra unless extra.nil? + + if option[:diff] then + nodes = hashes[item.structural_hash] + node = nodes[i] + + source = begin + msg = "sexp_to_#{File.extname(node.file).sub(/./, "")}" + self.respond_to?(msg) ? self.send(msg, node) : sexp_to_rb(node) + end + + contents = [] + contents.push(source) + file[:contents] = contents + end + + files.push(file) + end + + clone[:files] = files + + clones.push(clone) + + json[:clones] = clones + + end + end + + io.puts json.to_json + end + + def report_io io, data + io.puts "Total score (lower is better) = #{self.total}" + + if option[:summary] + io.puts + + self.summary.sort_by { |_,v| -v }.each do |file, score| + io.puts "%8.2f: %s" % [score, file] + end + + return + end + + data.each_with_index do |item, count| + prefix = "%d) " % (count + 1) if option[:number] + + match = item.identical? ? "IDENTICAL" : "Similar" + + io.puts + io.puts "%s%s code found in %p (mass%s = %d)" % + [prefix, match, item.name, item.bonus, item.mass] + + item.locations.each_with_index do |loc, i| + loc_prefix = "%s: " % (?A.ord + i).chr if option[:diff] + extra = " (FUZZY)" if loc.fuzzy? + io.puts " %s%s:%d%s" % [loc_prefix, loc.file, loc.line, extra] + end + + if option[:diff] then + io.puts + + nodes = hashes[item.structural_hash] + + sources = nodes.map do |s| + msg = "sexp_to_#{File.extname(s.file).sub(/./, "")}" + self.respond_to?(msg) ? self.send(msg, s) : sexp_to_rb(s) + end + + io.puts n_way_diff(*sources) + end + end + end + + ## + # Output the report. Duh. + + def report io = $stdout + only = option[:only] + + data = analyze only + + if option[:report] + report_json(io,data) + else + report_io(io,data) + end + end + + def sexp_to_rb sexp + begin + require "ruby2ruby" + rescue LoadError + return "ruby2ruby is required for diff" + end + @r2r ||= Ruby2Ruby.new + @r2r.process sexp.deep_clone + end +end + +class String + attr_accessor :group # :nodoc: +end + +class Sexp + ## + # Whether or not this sexp is a mutated/modified sexp. + + attr_accessor :modified + alias :modified? :modified # Is this sexp modified? + + ## + # Calculate the structural hash for this sexp. Cached, so don't + # modify the sexp afterwards and expect it to be correct. + + def structural_hash + @structural_hash ||= self.structure.hash + end + + ## + # Returns a list of structural hashes for all nodes (and sub-nodes) + # of this sexp. + + def all_structural_subhashes + hashes = [] + self.deep_each do |node| + hashes << node.structural_hash + end + hashes + end + + def initialize_copy o # :nodoc: + s = super + s.file = o.file + s.line = o.line + s.modified = o.modified + s + end + + def [] a # :nodoc: + s = super + if Sexp === s then + s.file = self.file + s.line = self.line + s.modified = self.modified + end + s + end + + def + o # :nodoc: + self.dup.concat o + end + + ## + # Useful general array method that splits the array from 0..+n+ and + # the rest. Returns both sections. + + def split_at n + return self[0..n], self[n+1..-1] + end + + ## + # Return the index of the last non-code element, or nil if this sexp + # is not a code-bearing node. + + def code_index + { + :block => 0, # s(:block, *code) + :class => 2, # s(:class, name, super, *code) + :module => 1, # s(:module, name, *code) + :defn => 2, # s(:defn, name, args, *code) + :defs => 3, # s(:defs, recv, name, args, *code) + :iter => 2, # s(:iter, recv, args, *code) + }[self.sexp_type] + end + + alias has_code? code_index # Does this sexp have a +*code+ section? + + ## + # Split the sexp into front-matter and code-matter, returning both. + # See #code_index. + + def split_code + index = self.code_index + self.split_at index if index + end +end + +class Array # :nodoc: + + ## + # Delete anything in +self+ if they are identical to anything in +other+. + + def delete_eql other + self.delete_if { |o1| other.any? { |o2| o1.equal? o2 } } + end +end diff --git a/docs/duplication-tests/ruby-contain-results/src/flay_erb.rb b/docs/duplication-tests/ruby-contain-results/src/flay_erb.rb new file mode 100644 index 0000000..7f9aad1 --- /dev/null +++ b/docs/duplication-tests/ruby-contain-results/src/flay_erb.rb @@ -0,0 +1,44 @@ +#!/usr/bin/ruby + +require "rubygems" +require "flay" +require "erubis" + +class Flay + + ## + # Process erb and parse the result. Returns the sexp of the parsed + # ruby. + + def process_erb file + erb = File.read file + + ruby = Erubis.new(erb).src + begin + RubyParser.new.process(ruby, file) + rescue => e + warn ruby if option[:verbose] + raise e + end + end + + class Erubis < ::Erubis::Eruby # :nodoc: + BLOCK_EXPR = /\s+(do|\{)(\s*\|[^|]*\|)?\s*\Z/ + + def add_expr_literal(src, code) + if code =~ BLOCK_EXPR + src << '@output_buffer.append= ' << code + else + src << '@output_buffer.append=(' << code << ');' + end + end + + def add_expr_escaped(src, code) + if code =~ BLOCK_EXPR + src << "@output_buffer.safe_append= " << code + else + src << "@output_buffer.safe_append=(" << code << ");" + end + end + end +end diff --git a/docs/duplication-tests/ruby-no-results/results.xml b/docs/duplication-tests/ruby-no-results/results.xml new file mode 100644 index 0000000..48b06b7 --- /dev/null +++ b/docs/duplication-tests/ruby-no-results/results.xml @@ -0,0 +1 @@ + diff --git a/docs/duplication-tests/ruby-no-results/src/nores.rb b/docs/duplication-tests/ruby-no-results/src/nores.rb new file mode 100644 index 0000000..87e5797 --- /dev/null +++ b/docs/duplication-tests/ruby-no-results/src/nores.rb @@ -0,0 +1,11 @@ + +def code_index +{ + :block => 0, # s(:block, *code) + :class => 2, # s(:class, name, super, *code) + :module => 1, # s(:module, name, *code) + :defn => 2, # s(:defn, name, args, *code) + :defs => 3, # s(:defs, recv, name, args, *code) + :iter => 2, # s(:iter, recv, args, *code) +}[self.sexp_type] +end