1+ require "loofah"
2+
3+ module Shamu
4+ module Attributes
5+
6+ # Adds an HTML sanitation option to attributes. When present, string values
7+ # will be sanitized when the attribute is read.
8+ #
9+ # The raw unfiltered value is always available as `#{ attribute }_raw`.
10+ module HtmlSanitation
11+ extend ActiveSupport ::Concern
12+
13+ # The standard HTML sanitation filter methods.
14+ STANDARD_FILTER_METHODS = [
15+ :none , # Don't allow any HTML
16+ :simple , # Allow very simple HTML. See {#simple_html_sanitize}.
17+ :body , # Allow subset useful for body copy. See
18+ # {#body_html_sanitize}.
19+ :safe , # Allow a broad subset of HTML tags and attributes. See
20+ # {#safe_html_sanitize}.
21+ :allow # Allow all HTML.
22+ ] . freeze
23+
24+ # Tags safe for simple text.
25+ SIMPLE_TAGS = %w( B I STRONG EM ) . freeze
26+
27+ # Tags safe for body text.
28+ BODY_TAGS = %w( B BR CODE DIV EM H2 H3 H4 H5 H6 HR I LI OL P PRE SPAN STRONG U UL ) . freeze
29+
30+ # Tags that are not safe.
31+ UNSAFE_TAGS = %w( FORM SCRIPT IFRAME FRAME ) . freeze
32+
33+ class_methods do
34+ # (see Attributes.attribute)
35+ # @param [Symbol,#call] html sanitation options. Acceptable values are
36+ #
37+ # - `:none` strip all HTML. The default.
38+ # - `:simple` simple formatting suitable for most places. See
39+ # {#simple_html_sanitize} for details.
40+ # - `:body` basic formatting for 'body' text. See
41+ # {#body_html_sanitize} for details.
42+ # - `:allow` permit any HTML tag.
43+ # - Any other symbol is assumed to be a method on the entity that will
44+ # be called to filter the html.
45+ # - `#call` anything that responds to `#call` that takes a single
46+ # argument of the raw string and returns the sanitized HTML.
47+ def attribute ( name , *args , **options , &block )
48+ super . tap do
49+ define_html_sanitized_attribute_reader ( name , options [ :html ] ) if options . key? ( :html )
50+ end
51+ end
52+
53+ private
54+
55+ def define_attribute_reader ( name , as : nil , ** )
56+ super
57+
58+ class_eval <<-RUBY , __FILE__ , __LINE__ + 1
59+ def #{ name } _raw # def attribute_raw
60+ return @#{ name } if defined? @#{ name } # return @attribute if defined? @attribute
61+ @#{ name } = fetch_#{ name } # @attribute = fetch_attribute
62+ end # end
63+ RUBY
64+ end
65+
66+ def define_html_sanitized_attribute_reader ( name , method )
67+ method ||= :none
68+
69+ filter_method = resolve_html_filter_method ( name , method )
70+ class_eval <<-RUBY , __FILE__ , __LINE__ + 1
71+ def #{ name } # def attribute
72+ return @#{ name } _html_sanitized if defined? @#{ name } _html_sanitized # return @attribute_html_sanitized if defined? @attribute_html_sanitized
73+ @#{ name } _html_sanitized = #{ filter_method } ( #{ name } _raw ) # @attribute_html_sanitized = simple_html_sanitized( attribute_raw )
74+ end # end
75+ RUBY
76+ end
77+
78+ def resolve_html_filter_method ( name , method )
79+ if STANDARD_FILTER_METHODS . include? ( method )
80+ "#{ method } _html_sanitize"
81+ elsif method . is_a? ( Symbol )
82+ method
83+ else
84+ filter_method = "custom_#{ name } _html_sanitize"
85+ define_method filter_method , &method
86+ filter_method
87+ end
88+ end
89+ end
90+
91+ private
92+
93+ # @!visibility public
94+ #
95+ # Remove all HTML from the value.
96+ #
97+ # @param [String] value to sanitize.
98+ # @return [String] the sanitized value.
99+ def none_html_sanitize ( value )
100+ return value unless value . is_a? ( String )
101+
102+ Loofah . fragment ( value ) . scrub! ( NoneScrubber . new ) . to_s
103+ end
104+
105+ # @!visibility public
106+ #
107+ # Remove all but the simplest html tags <B>, <I>, <STRONG>, <EM>.
108+ #
109+ # @param [String] value to sanitize.
110+ # @return [String] the sanitized value.
111+ def simple_html_sanitize ( value )
112+ return value unless value . is_a? ( String )
113+
114+ Loofah . fragment ( value ) . scrub! ( SimpleScrubber . new ) . to_s
115+ end
116+
117+ # @!visibility public
118+ #
119+ # Remove all but a limited subset of common tags useful for body copy
120+ # text. See {BODY_TAGS}.
121+ #
122+ # @param [String] value to sanitize.
123+ # @return [String] the sanitized value.
124+ def body_html_sanitize ( value )
125+ return value unless value . is_a? ( String )
126+
127+ Loofah . fragment ( value ) . scrub! ( BodyScrubber . new ) . to_s
128+ end
129+
130+ # @!visibility public
131+ #
132+ # Remove all HTML from the value.
133+ #
134+ # @param [String] value to sanitize.
135+ # @return [String] the sanitized value.
136+ def safe_html_sanitize ( value )
137+ return value unless value . is_a? ( String )
138+
139+ Loofah . fragment ( value )
140+ . scrub! ( SafeScrubber . new )
141+ . scrub! ( :no_follow )
142+ . to_s
143+ end
144+
145+ # @!visibility public
146+ #
147+ # Does not perform any sanitization of the value.
148+ #
149+ # @param [String] value to sanitize.
150+ # @return [String] the sanitized value.
151+ def allow_html_sanitize ( value )
152+ return value unless value . is_a? ( String )
153+
154+ Loofah . fragment ( value ) . scrub! ( :no_follow ) . to_s
155+ end
156+
157+ class NoneScrubber < Loofah ::Scrubber
158+ def initialize
159+ @direction = :bottom_up
160+ end
161+
162+ def scrub ( node )
163+ if node . text?
164+ Loofah ::Scrubber ::CONTINUE
165+ else
166+ node . before node . children
167+ node . remove
168+ end
169+ end
170+ end
171+
172+ class PermitScrubber < Loofah ::Scrubber
173+ def initialize
174+ @direction = :bottom_up
175+ end
176+
177+ def scrub ( node )
178+ if node . type == Nokogiri ::XML ::Node ::ELEMENT_NODE
179+ if allowed_element? ( node . name )
180+ Loofah ::HTML5 ::Scrub . scrub_attributes node
181+ else
182+ node . before node . children unless unsafe_element? ( node . name )
183+ node . remove
184+ end
185+ end
186+
187+ Loofah ::Scrubber ::CONTINUE
188+ end
189+
190+ def allowed_element? ( name )
191+ end
192+
193+ def unsafe_element? ( name )
194+ UNSAFE_TAGS . include? ( name . upcase )
195+ end
196+ end
197+
198+ class SimpleScrubber < PermitScrubber
199+ def allowed_element? ( name )
200+ SIMPLE_TAGS . include? ( name . upcase )
201+ end
202+ end
203+
204+ class BodyScrubber < PermitScrubber
205+ def allowed_element? ( name )
206+ BODY_TAGS . include? ( name . upcase )
207+ end
208+ end
209+
210+ class SafeScrubber < PermitScrubber
211+ def allowed_element? ( name )
212+ !unsafe_element? ( name )
213+ end
214+ end
215+
216+ end
217+ end
218+ end
0 commit comments