3939 BACKSLASH = '\\';
4040 # Could limit to hex here, but “bad unicode escape” on 0XXF is probably a
4141 # more helpful error than “unknown char”
42- UNICODE_ESCAPE = '\\u' [0-9A-Za-z]{4};
42+ UNICODE_DIGIT = [0-9A-Za-z];
43+ FOUR_DIGIT_UNICODE = UNICODE_DIGIT{4};
44+ N_DIGIT_UNICODE = LCURLY UNICODE_DIGIT{4,} RCURLY;
45+ UNICODE_ESCAPE = '\\u' (FOUR_DIGIT_UNICODE | N_DIGIT_UNICODE);
4346 # https://graphql.github.io/graphql-spec/June2018/#sec-String-Value
4447 STRING_ESCAPE = '\\' [\\/bfnrt];
4548 BLOCK_QUOTE = '"""';
@@ -131,7 +134,25 @@ module GraphQL
131134 # To avoid allocating more strings, this modifies the string passed into it
132135 def self.replace_escaped_characters_in_place(raw_string)
133136 raw_string.gsub!(ESCAPES, ESCAPES_REPLACE)
134- raw_string.gsub!(UTF_8, &UTF_8_REPLACE)
137+ raw_string.gsub!(UTF_8) do |_matched_str|
138+ codepoint_1 = ($1 || $2).to_i(16)
139+ codepoint_2 = $3
140+
141+ if codepoint_2
142+ codepoint_2 = codepoint_2.to_i(16)
143+ if (codepoint_1 >= 0xD800 && codepoint_1 <= 0xDBFF) && # leading surrogate
144+ (codepoint_2 >= 0xDC00 && codepoint_2 <= 0xDFFF) # trailing surrogate
145+ # A surrogate pair
146+ combined = ((codepoint_1 - 0xD800) * 0x400) + (codepoint_2 - 0xDC00) + 0x10000
147+ [combined].pack('U'.freeze)
148+ else
149+ # Two separate code points
150+ [codepoint_1].pack('U'.freeze) + [codepoint_2].pack('U'.freeze)
151+ end
152+ else
153+ [codepoint_1].pack('U'.freeze)
154+ end
155+ end
135156 nil
136157 end
137158
@@ -203,8 +224,8 @@ module GraphQL
203224 "\\t" => "\t",
204225 }
205226
206- UTF_8 = /\\u[\dAa-f]{4}/i
207- UTF_8_REPLACE = ->(m) { [m[-4..-1].to_i(16)].pack('U'.freeze) }
227+ UTF_8 = /\\u(?:( [\dAa-f]{4})|\{([\da-f]{4,})\})(?:\\u([\dAa-f]{4}))? /i
228+
208229
209230 VALID_STRING = /\A(?:[^\\]|#{ESCAPES}|#{UTF_8})*\z/o
210231
@@ -219,8 +240,7 @@ module GraphQL
219240 line_incr = value.count("\n")
220241 value = GraphQL::Language::BlockString.trim_whitespace(value)
221242 end
222- # TODO: replace with `String#match?` when we support only Ruby 2.4+
223- # (It's faster: https://bugs.ruby-lang.org/issues/8110)
243+
224244 if !value.valid_encoding? || !value.match?(VALID_STRING)
225245 meta[:tokens] << token = GraphQL::Language::Token.new(
226246 :BAD_UNICODE_ESCAPE,
0 commit comments