Skip to content

Commit b4fc101

Browse files
committed
stop running sanitization twice
1 parent 9332008 commit b4fc101

File tree

4 files changed

+20
-15
lines changed

4 files changed

+20
-15
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,9 @@ The `ConvertFilter` takes text and turns it into HTML. `@text`, `@config`, and `
171171

172172
### Sanitization
173173

174-
Because the web can be a scary place, HTML is automatically sanitized after the `ConvertFilter` runs and before the `NodeFilter`s are processed. This is to prevent malicious or unexpected input from entering the pipeline.
174+
Because the web can be a scary place, **HTML is automatically sanitized** after the `ConvertFilter` runs and before the `NodeFilter`s are processed. This is to prevent malicious or unexpected input from entering the pipeline.
175175

176-
The sanitization process takes a hash configuration of settings. See the [Selma](https://www.github.com/gjtorikian/selma) documentation for more information on how to configure these settings.
176+
The sanitization process takes a hash configuration of settings. See the [Selma](https://www.github.com/gjtorikian/selma) documentation for more information on how to configure these settings. Note that users must correctly configure the sanitization configuration if they expect to use it correctly in conjunction with handlers which manipulate HTML.
177177

178178
A default sanitization config is provided by this library (`HTMLPipeline::SanitizationFilter::DEFAULT_CONFIG`). A sample custom sanitization allowlist might look like this:
179179

lib/html_pipeline.rb

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,20 @@ def call(text, context: {}, result: {})
175175
end
176176
end
177177

178-
unless @node_filters.empty?
178+
rewriter_options = {
179+
memory: {
180+
max_allowed_memory_usage: 5242880, # arbitrary limit of 5MB
181+
},
182+
}
183+
184+
if @node_filters.empty?
185+
instrument("sanitization.html_pipeline", payload) do
186+
result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, handlers: @node_filters, options: rewriter_options).rewrite(html)
187+
end unless @convert_filter.nil? # no html, so no sanitization
188+
else
179189
instrument("call_node_filters.html_pipeline", payload) do
180190
@node_filters.each { |filter| filter.context = (filter.context || {}).merge(context) }
181-
result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, handlers: @node_filters).rewrite(html)
182-
html = result[:output]
191+
result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, handlers: @node_filters, options: rewriter_options).rewrite(html)
183192
payload = default_payload({
184193
node_filters: @node_filters.map { |f| f.class.name },
185194
context: context,
@@ -188,10 +197,6 @@ def call(text, context: {}, result: {})
188197
end
189198
end
190199

191-
instrument("sanitization.html_pipeline", payload) do
192-
result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, handlers: @node_filters).rewrite(html)
193-
end
194-
195200
result = result.merge(@node_filters.collect(&:result).reduce({}, :merge))
196201
@node_filters.each(&:reset!)
197202

test/html_pipeline_test.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def test_context_is_carried_over_in_call
131131
# - yeH is bolded
132132
# - strikethroughs are rendered
133133
# - mentions are not linked
134-
assert_equal("<p><strong>yeH</strong>! I <em>think</em> <a href=\"/gjtorikian\">@gjtorikian</a> is <del>great</del>!</p>", result)
134+
assert_equal("<p><strong>yeH</strong>! I <em>think</em> <a href=\"/gjtorikian\" class=\"user-mention\">@gjtorikian</a> is <del>great</del>!</p>", result)
135135

136136
context = {
137137
bolded: false,
@@ -144,7 +144,7 @@ def test_context_is_carried_over_in_call
144144
# - yeH is not bolded
145145
# - strikethroughs are not rendered
146146
# - mentions are linked
147-
assert_equal("<p>yeH! I <em>think</em> <a href=\"http://your-domain.com/gjtorikian\">@gjtorikian</a> is ~great~!</p>", result_with_context)
147+
assert_equal("<p>yeH! I <em>think</em> <a href=\"http://your-domain.com/gjtorikian\" class=\"user-mention\">@gjtorikian</a> is ~great~!</p>", result_with_context)
148148
end
149149

150150
def test_text_filter_instance_context_is_carried_over_in_call
@@ -160,7 +160,7 @@ def test_text_filter_instance_context_is_carried_over_in_call
160160

161161
# note:
162162
# - yeH is not bolded due to previous context
163-
assert_equal("<p>yeH! I <em>think</em> <a href=\"/gjtorikian\">@gjtorikian</a> is <del>great</del>!</p>", result)
163+
assert_equal("<p>yeH! I <em>think</em> <a href=\"/gjtorikian\" class=\"user-mention\">@gjtorikian</a> is <del>great</del>!</p>", result)
164164
end
165165

166166
def test_convert_filter_instance_context_is_carried_over_in_call
@@ -176,7 +176,7 @@ def test_convert_filter_instance_context_is_carried_over_in_call
176176

177177
# note:
178178
# - strikethroughs are not rendered due to previous context
179-
assert_equal("<p><strong>yeH</strong>! I <em>think</em> <a href=\"/gjtorikian\">@gjtorikian</a> is <del>great</del>!</p>", result)
179+
assert_equal("<p><strong>yeH</strong>! I <em>think</em> <a href=\"/gjtorikian\" class=\"user-mention\">@gjtorikian</a> is <del>great</del>!</p>", result)
180180
end
181181

182182
def test_node_filter_instance_context_is_carried_over_in_call
@@ -192,6 +192,6 @@ def test_node_filter_instance_context_is_carried_over_in_call
192192

193193
# note:
194194
# - mentions are linked
195-
assert_equal("<p><strong>yeH</strong>! I <em>think</em> <a href=\"http://your-domain.com/gjtorikian\">@gjtorikian</a> is <del>great</del>!</p>", result)
195+
assert_equal("<p><strong>yeH</strong>! I <em>think</em> <a href=\"http://your-domain.com/gjtorikian\" class=\"user-mention\">@gjtorikian</a> is <del>great</del>!</p>", result)
196196
end
197197
end

test/sanitization_filter_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def test_sanitization_pipeline_can_be_configured
232232
CODE
233233

234234
expected = <<~HTML
235-
<p>This is great, <a>@balevine</a>:</p>
235+
<p>This is great, <a href="/balevine" class="user-mention">@balevine</a>:</p>
236236
<pre><code>some_code(:first)
237237
</code></pre>
238238
HTML

0 commit comments

Comments
 (0)