htmldiff/lib/htmldiff/diff_builder.rb at f9dceccfb910da440da823d3c89a5d92d2fb2bc6 · openupresources/htmldiff · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
require 'nokogiri'

module HTMLDiff
  # Main class for building the diff output between two strings. Other classes
  # find out where the differences actually are, then this class turns that into
  # HTML.
  class DiffBuilder
    attr_reader :content

    def initialize(old_version, new_version, options = {})
      @options = default_options.merge options
      @old_words = ListOfWords.new old_version, @options
      @new_words = ListOfWords.new new_version, @options
      @content = []
    end

    def default_options
      {
        block_tag_classes: [],
        compare_tag_attributes: false
      }
    end

    def compare_tag_attributes?
      @options[:compare_tag_attributes]
    end

    def build
      perform_operations
      content.join
    end

    # These operations are a list of things that changed between the two
    # versions, which now need to be turned into valid HTML that shows things
    # with ins and del tags.
    def operations
      HTMLDiff::MatchFinder.new(@old_words, @new_words).operations
    end

    def perform_operations
      operations.each { |op| perform_operation(op) }
    end

    def perform_operation(operation)
      send operation.action, operation
    end

    # This is for when a chunk of text has been replaced with a different bit.
    # We want to ignore tags that are the same e.g.
    # '<p>' replaced by
    # '<p class="highlight">'
    # will come back from the diff algorithm as a replacement (tags are treated
    # as words in their entirety), but we don't have any use for seeing this
    # represented visually.
    #
    # @param operation [HTMLDiff::Operation]
    def replace(operation)
      # Special case: a tag has been altered so that an attribute has been
      # added e.g. <p> becomes <p style="margin: 2px"> due to an editor button
      # press. For this, we just show the new version, otherwise it gets messy
      # trying to find the closing tag.

      if operation.same_tag?(compare_tag_attributes?)
        equal(operation)
      else
        delete(operation, 'diffmod')
        insert(operation, 'diffmod')
      end
    end

    # @param operation [HTMLDiff::Operation]
    def insert(operation, tagclass = 'diffins')
      insert_tag('ins', tagclass, operation.new_words)
    end

    # @param operation [HTMLDiff::Operation]
    def delete(operation, tagclass = 'diffdel')
      insert_tag('del', tagclass, operation.old_words)
    end

    # No difference between these parts of the text. No tags to insert, simply
    # copy the matching words from one of the versions.
    #
    # @param operation [HTMLDiff::Operation]
    def equal(operation)
      @content << operation.new_text
    end

    # This method encloses words within a specified tag (ins or del), and adds
    # this into @content, with a twist: if there are words contain tags, it
    # actually creates multiple ins or del, so that they don't include any ins
    # or del tags that are not properly nested. This handles cases like
    # old: '<p>a</p>'
    # new: '<p>ab</p><p>c</p>'
    # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
    # This still doesn't guarantee valid HTML (hint: think about diffing a text
    # containing ins or del tags), but handles correctly more cases than the
    # earlier version.
    #
    # P.S.: Spare a thought for people who write HTML browsers. They live in
    # this... every day.
    def insert_tag(tagname, cssclass, words)
      wrapped = false

      loop do
        break if words.empty?

        # Handle empty tags as single blocks
        if words.first.closed_empty_tag?
          tag_words = words.extract_consecutive_words! { |word| word.closed_empty_tag? }
          @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
        elsif words.first.standalone_tag?
          tag_words = words.extract_consecutive_words! { |word| word.standalone_tag? }
          @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
        elsif words.first.iframe_tag?
          tag_words = words.extract_consecutive_words! { |word| word.iframe_tag? }
          @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
        elsif words.first.block_tag?
          tag_words = words.extract_consecutive_words! { |word| word.block_tag? }
          @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass)
        elsif words.first.tag?

          # If this chunk of text contains unclosed tags, then wrapping it will
          # cause weirdness. This would be the case if we have e.g. a style
          # applied to a paragraph tag, which will change the opening tag, but
          # not the closing tag.
          #
          #

          if !wrapped && !words.contains_unclosed_tag?
            @content << diff_tag_start(tagname, cssclass)
            wrapped = true
          end
          @content += words.extract_consecutive_words! do |word|
            word.tag? && !word.standalone_tag? && !word.iframe_tag? && !word.closed_empty_tag?
          end
        else
          non_tags = words.extract_consecutive_words! do |word|
            (word.standalone_tag? || !word.tag?)
          end
          unless non_tags.join.empty?
            @content << wrap_text_in_diff_tag(non_tags.join, tagname, cssclass)
          end

          break if words.empty?
        end
      end

      @content << diff_tag_end(tagname) if wrapped
    end

    def wrap_text_in_diff_tag(text, tagname, cssclass)
      diff_tag_start(tagname, cssclass) + text + diff_tag_end(tagname)
    end

    def diff_tag_start(tagname, cssclass)
      %(<#{tagname} class="#{cssclass}">)
    end

    def diff_tag_end(tagname)
      %(</#{tagname}>)
    end
  end
end