File tree Expand file tree Collapse file tree 2 files changed +33
-8
lines changed
Expand file tree Collapse file tree 2 files changed +33
-8
lines changed Original file line number Diff line number Diff line change @@ -82,16 +82,22 @@ def self.hangul_comp_one(string)
8282
8383 ## Canonical Ordering
8484 def self . canonical_ordering_one ( string )
85- sorting = string . each_char . collect { |c | [ c , CLASS_TABLE [ c ] ] }
86- ( sorting . length -2 ) . downto ( 0 ) do |i | # almost, but not exactly bubble sort
87- ( 0 ..i ) . each do |j |
88- later_class = sorting [ j +1 ] . last
89- if 0 <later_class and later_class <sorting [ j ] . last
90- sorting [ j ] , sorting [ j +1 ] = sorting [ j +1 ] , sorting [ j ]
91- end
85+ result = ''
86+ unordered = [ ]
87+ chars = string . chars
88+ n = chars . size
89+ chars . each_with_index do |char , i |
90+ ccc = CLASS_TABLE [ char ]
91+ if ccc == 0
92+ unordered . sort! . each { result << chars [ it % n ] }
93+ unordered . clear
94+ result << char
95+ else
96+ unordered << ccc * n + i
9297 end
9398 end
94- return sorting . collect ( &:first ) . join ( '' )
99+ unordered . sort! . each { result << chars [ it % n ] }
100+ result
95101 end
96102
97103 ## Normalization Forms for Patterns (not whole Strings)
Original file line number Diff line number Diff line change @@ -209,4 +209,23 @@ def test_us_ascii
209209 assert_equal true , ascii_string . unicode_normalized? ( :nfkc )
210210 assert_equal true , ascii_string . unicode_normalized? ( :nfkd )
211211 end
212+
213+ def test_canonical_ordering
214+ a = "\u03B1 \u0313 \u0300 \u0345 "
215+ a_unordered1 = "\u03B1 \u0345 \u0313 \u0300 "
216+ a_unordered2 = "\u03B1 \u0313 \u0345 \u0300 "
217+ u1 = "U\u0308 \u0304 "
218+ u2 = "U\u0304 \u0308 "
219+ s = "s\u0323 \u0307 "
220+ s_unordered = "s\u0307 \u0323 "
221+ o = "\u{1611e} \u{1611e} \u{1611f} "
222+ # Actual cases called through String#unicode_normalize
223+ assert_equal ( s + o , UnicodeNormalize . canonical_ordering_one ( s_unordered + o ) )
224+ assert_equal ( a [ 1 ..] , UnicodeNormalize . canonical_ordering_one ( a_unordered1 [ 1 ..] ) )
225+ assert_equal ( a [ 1 ..] + o , UnicodeNormalize . canonical_ordering_one ( a_unordered2 [ 1 ..] + o ) )
226+ # Artificial cases
227+ assert_equal ( a + u1 + o + u2 + s , UnicodeNormalize . canonical_ordering_one ( a + u1 + o + u2 + s ) )
228+ assert_equal ( s [ 1 ..] + a + a , UnicodeNormalize . canonical_ordering_one ( s_unordered [ 1 ..] + a_unordered1 + a_unordered2 ) )
229+ assert_equal ( o + s + u1 + a + o + a + u2 + o , UnicodeNormalize . canonical_ordering_one ( o + s_unordered + u1 + a_unordered1 + o + a_unordered2 + u2 + o ) )
230+ end
212231end
You can’t perform that action at this time.
0 commit comments