Skip to content

Commit a31ac87

Browse files
committed
ERB::Util.html_escape stop trying to tidy bytes
This call to `Unicode.tidy_bytes` has been introduced 10 years ago in rails#19992 but this pull request has been merged by mistake and was supposed to be reverted. Semantically it makes no sense to deal with invalid strings at that layer, and performance wise it impose a massive overhead. ``` ruby 3.4.2 (2025-02-15 revision d2930f8e7a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- current 504.509k i/100ms no_tidy 1.749M i/100ms Calculating ------------------------------------- current 5.607M (± 1.0%) i/s (178.34 ns/i) - 28.253M in 5.038946s no_tidy 21.792M (± 0.6%) i/s (45.89 ns/i) - 110.211M in 5.057658s Comparison: current: 5607354.4 i/s no_tidy: 21791597.8 i/s - 3.89x faster ruby 3.4.2 (2025-02-15 revision d2930f8e7a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- current 261.902k i/100ms no_tidy 518.277k i/100ms Calculating ------------------------------------- current 2.795M (± 1.3%) i/s (357.72 ns/i) - 14.143M in 5.060105s no_tidy 5.508M (± 0.2%) i/s (181.55 ns/i) - 27.987M in 5.081000s Comparison: current: 2795448.7 i/s no_tidy: 5508171.9 i/s - 1.97x faster ``` ```ruby require "bundler/inline" gemfile do gem "rails" gem "benchmark-ips" end require "active_support/all" require "active_support/core_ext/erb/util" module ERB::Util def self.html_escape_no_tidy(s) # :nodoc: s = s.to_s if s.html_safe? s else unwrapped_html_escape(s) end end end Benchmark.ips do |x| s = "Hello World" x.report("current") { ERB::Util.html_escape(s) } x.report("no_tidy") { ERB::Util.html_escape_no_tidy(s) } x.compare!(order: :baseline) end Benchmark.ips do |x| s = "Hello World" * 20 x.report("current") { ERB::Util.html_escape(s) } x.report("no_tidy") { ERB::Util.html_escape_no_tidy(s) } x.compare!(order: :baseline) end ```
1 parent 67e7f9e commit a31ac87

File tree

2 files changed

+2
-14
lines changed

2 files changed

+2
-14
lines changed

activesupport/lib/active_support/core_ext/erb/util.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def html_escape(s) # :nodoc:
1212
if s.html_safe?
1313
s
1414
else
15-
super(ActiveSupport::Multibyte::Unicode.tidy_bytes(s))
15+
super(s)
1616
end
1717
end
1818
alias :unwrapped_html_escape :html_escape # :nodoc:
@@ -61,7 +61,7 @@ module Util
6161
# html_escape_once('<< Accept & Checkout')
6262
# # => "<< Accept & Checkout"
6363
def html_escape_once(s)
64-
ActiveSupport::Multibyte::Unicode.tidy_bytes(s.to_s).gsub(HTML_ESCAPE_ONCE_REGEXP, HTML_ESCAPE).html_safe
64+
s.to_s.gsub(HTML_ESCAPE_ONCE_REGEXP, HTML_ESCAPE).html_safe
6565
end
6666

6767
module_function :html_escape_once

activesupport/test/core_ext/string_ext_test.rb

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,12 +1102,6 @@ def to_s
11021102
assert_equal expected, ERB::Util.html_escape(string)
11031103
end
11041104

1105-
test "ERB::Util.html_escape should correctly handle invalid UTF-8 strings" do
1106-
string = "\251 <"
1107-
expected = "© &lt;"
1108-
assert_equal expected, ERB::Util.html_escape(string)
1109-
end
1110-
11111105
test "ERB::Util.html_escape should not escape safe strings" do
11121106
string = "<b>hello</b>".html_safe
11131107
assert_equal string, ERB::Util.html_escape(string)
@@ -1121,12 +1115,6 @@ def to_s
11211115
assert_equal escaped_string, ERB::Util.html_escape_once(escaped_string)
11221116
end
11231117

1124-
test "ERB::Util.html_escape_once should correctly handle invalid UTF-8 strings" do
1125-
string = "\251 <"
1126-
expected = "© &lt;"
1127-
assert_equal expected, ERB::Util.html_escape_once(string)
1128-
end
1129-
11301118
test "ERB::Util.xml_name_escape should escape unsafe characters for XML names" do
11311119
unsafe_char = ">"
11321120
safe_char = "Á"

0 commit comments

Comments
 (0)