forked from rails/rails
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit 8194f8e
Improve
resolves rails#46569
```ruby
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
gem "rails", github: "rails/rails", branch: "main"
gem "benchmark-ips"
end
require "active_support"
require "active_support/inflector/transliterate"
module ActiveSupport::Inflector
def transliterate_fast(string, replacement = "?", locale: nil)
raise ArgumentError, "Can only transliterate strings. Received #{string.class.name}" unless string.is_a?(String)
raise ArgumentError, "Cannot transliterate strings with #{string.encoding} encoding" unless ALLOWED_ENCODINGS_FOR_TRANSLITERATE.include?(string.encoding)
return string if string.ascii_only?
string = string.dup if string.frozen?
input_encoding = string.encoding
# US-ASCII is a subset of UTF-8 so we'll force encoding as UTF-8 if
# US-ASCII is given. This way we can let tidy_bytes handle the string
# in the same way as we do for UTF-8
string.force_encoding(Encoding::UTF_8) if string.encoding == Encoding::US_ASCII
# GB18030 is Unicode compatible but is not a direct mapping so needs to be
# transcoded. Using invalid/undef :replace will result in loss of data in
# the event of invalid characters, but since tidy_bytes will replace
# invalid/undef with a "?" we're safe to do the same beforehand
string.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace) if string.encoding == Encoding::GB18030
transliterated = I18n.transliterate(
ActiveSupport::Multibyte::Unicode.tidy_bytes(string).unicode_normalize(:nfc),
replacement: replacement,
locale: locale
)
# Restore the string encoding of the input if it was not UTF-8.
# Apply invalid/undef :replace as tidy_bytes does
transliterated.encode!(input_encoding, invalid: :replace, undef: :replace) if input_encoding != transliterated.encoding
transliterated
end
end
SCENARIOS = {
"Empty" => "",
"Single Space" => " ",
"ASCII string" => "This is a normal ASCII String.",
"US ASCII encoded String" => String.new("This is a normal ASCII String.", encoding: Encoding::US_ASCII),
"Very Long String" => "Very Long String :)" * 100,
"Very Long french String" => "Very Long Stringé :)" * 100,
"French string" => "Ceci est une chaîne de test pour la méthode de translittération.",
"UTF-8 encoded Chinese string" => String.new("這是音譯方法的測試字符串", encoding: Encoding::UTF_8)
}
SCENARIOS.each_pair do |name, value|
puts
puts " #{name} ".center(80, "=")
puts
Benchmark.ips do |x|
x.report("transliterate") { ActiveSupport::Inflector.transliterate(value) }
x.report("transliterate_fast") { ActiveSupport::Inflector.transliterate_fast(value) }
x.compare!
end
end
```
```txt
==================================== Empty =====================================
Warming up --------------------------------------
transliterate 65.500k i/100ms
transliterate_fast 687.485k i/100ms
Calculating -------------------------------------
transliterate 657.632k (± 0.8%) i/s - 3.340M in 5.079936s
transliterate_fast 6.869M (± 1.6%) i/s - 34.374M in 5.005813s
Comparison:
transliterate_fast: 6868816.3 i/s
transliterate: 657631.8 i/s - 10.44x (± 0.00) slower
================================= Single Space =================================
Warming up --------------------------------------
transliterate 62.732k i/100ms
transliterate_fast 678.223k i/100ms
Calculating -------------------------------------
transliterate 628.475k (± 0.8%) i/s - 3.199M in 5.090978s
transliterate_fast 6.799M (± 0.2%) i/s - 34.589M in 5.087534s
Comparison:
transliterate_fast: 6798890.3 i/s
transliterate: 628475.2 i/s - 10.82x (± 0.00) slower
================================= ASCII string =================================
Warming up --------------------------------------
transliterate 32.095k i/100ms
transliterate_fast 687.396k i/100ms
Calculating -------------------------------------
transliterate 319.529k (± 0.8%) i/s - 1.605M in 5.022547s
transliterate_fast 6.879M (± 0.3%) i/s - 35.057M in 5.096224s
Comparison:
transliterate_fast: 6879113.6 i/s
transliterate: 319528.9 i/s - 21.53x (± 0.00) slower
=========================== US ASCII encoded String ============================
Warming up --------------------------------------
transliterate 33.027k i/100ms
transliterate_fast 688.354k i/100ms
Calculating -------------------------------------
transliterate 330.268k (± 0.9%) i/s - 1.651M in 5.000445s
transliterate_fast 6.861M (± 0.8%) i/s - 34.418M in 5.016963s
Comparison:
transliterate_fast: 6860726.0 i/s
transliterate: 330267.8 i/s - 20.77x (± 0.00) slower
=============================== Very Long String ===============================
Warming up --------------------------------------
transliterate 985.000 i/100ms
transliterate_fast 672.674k i/100ms
Calculating -------------------------------------
transliterate 9.899k (± 0.5%) i/s - 50.235k in 5.074820s
transliterate_fast 6.729M (± 0.7%) i/s - 34.306M in 5.098807s
Comparison:
transliterate_fast: 6728668.4 i/s
transliterate: 9899.2 i/s - 679.72x (± 0.00) slower
=========================== Very Long french String ============================
Warming up --------------------------------------
transliterate 671.000 i/100ms
transliterate_fast 671.000 i/100ms
Calculating -------------------------------------
transliterate 6.635k (± 1.9%) i/s - 33.550k in 5.058424s
transliterate_fast 6.622k (± 1.7%) i/s - 33.550k in 5.068289s
Comparison:
transliterate: 6634.9 i/s
transliterate_fast: 6621.7 i/s - same-ish: difference falls within error
================================ French string =================================
Warming up --------------------------------------
transliterate 14.726k i/100ms
transliterate_fast 14.679k i/100ms
Calculating -------------------------------------
transliterate 145.933k (± 1.5%) i/s - 736.300k in 5.046537s
transliterate_fast 146.753k (± 1.2%) i/s - 733.950k in 5.001937s
Comparison:
transliterate_fast: 146752.8 i/s
transliterate: 145933.1 i/s - same-ish: difference falls within error
========================= UTF-8 encoded Chinese string =========================
Warming up --------------------------------------
transliterate 13.905k i/100ms
transliterate_fast 14.093k i/100ms
Calculating -------------------------------------
transliterate 141.222k (± 1.9%) i/s - 709.155k in 5.023366s
transliterate_fast 140.510k (± 1.7%) i/s - 704.650k in 5.016400s
Comparison:
transliterate: 141221.9 i/s
transliterate_fast: 140510.4 i/s - same-ish: difference falls within error
```ActiveSupport::Inflector.transliterate
performance1 parent fae0bae commit 8194f8eCopy full SHA for 8194f8e
File tree
Expand file treeCollapse file tree
1 file changed
+3
-1
lines changedFilter options
- activesupport/lib/active_support/inflector
Expand file treeCollapse file tree
1 file changed
+3
-1
lines changedactivesupport/lib/active_support/inflector/transliterate.rb
Copy file name to clipboardExpand all lines: activesupport/lib/active_support/inflector/transliterate.rb+3-1Lines changed: 3 additions & 1 deletion
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
62 | 62 |
| |
63 | 63 |
| |
64 | 64 |
| |
65 |
| - | |
66 | 65 |
| |
67 | 66 |
| |
68 | 67 |
| |
| 68 | + | |
| 69 | + | |
| 70 | + | |
69 | 71 |
| |
70 | 72 |
| |
71 | 73 |
| |
|
0 commit comments