@@ -36,27 +36,45 @@ const (
3636func isHighSurrogate (r rune ) bool { return r >= surr1 && r <= 0xdbff }
3737func isLowSurrogate (r rune ) bool { return r >= surr2 && r <= 0xdfff }
3838
39- // Decode decodes the UTF16-encoded string to UTF-8 string. This function
40- // exhibits much better performance than the standard library counterpart.
41- // All credits go to: https://gist.github.com/skeeto/09f1410183d246f9b18cba95c4e602f0
39+ // Decode decodes the UTF16-encoded string to UTF-8 string using fast ASCII path.
40+ // This function exhibits much better performance than the standard library counterpart.
4241func Decode (p []uint16 ) string {
43- s := make ([]byte , 0 , 2 * len (p ))
42+ n := len (p )
43+ if n == 0 {
44+ return ""
45+ }
46+
47+ s := make ([]byte , 0 , n * 2 )
48+
4449 for i := 0 ; i < len (p ); i ++ {
45- r := rune (0xfffd )
50+ // ascii fast-path (0x0000–0x007F)
51+ if p [i ] <= 0x7F {
52+ s = append (s , byte (p [i ]))
53+ continue
54+ }
55+
4656 r1 := rune (p [i ])
47- if isHighSurrogate (r1 ) {
48- if i + 1 < len (p ) {
49- r2 := rune (p [i + 1 ])
50- if isLowSurrogate (r2 ) {
51- i ++
52- r = 0x10000 + (r1 - surr1 )<< 10 + (r2 - surr2 )
53- }
57+
58+ // surrogate pair handling
59+ if isHighSurrogate (r1 ) && i + 1 < n {
60+ r2 := rune (p [i + 1 ])
61+ if isLowSurrogate (r2 ) {
62+ i ++
63+ r := 0x10000 + (r1 - surr1 )<< 10 + (r2 - surr2 )
64+ s = utf8 .AppendRune (s , r )
65+ continue
5466 }
55- } else if ! isLowSurrogate (r ) {
56- r = r1
5767 }
58- s = utf8 .AppendRune (s , r )
68+
69+ // non-surrogate BMP code point or malformed surrogate
70+ if ! isLowSurrogate (r1 ) {
71+ s = utf8 .AppendRune (s , r1 )
72+ } else {
73+ // lone low surrogate to replacement char
74+ s = utf8 .AppendRune (s , utf8 .RuneError )
75+ }
5976 }
77+
6078 return string (s )
6179}
6280
0 commit comments