Skip to content

Commit a0a737c

Browse files
committed
Speed up parser by inlining char_at
Replicates graphql/graphql-js@8013c0d
1 parent cf06bbf commit a0a737c

File tree

1 file changed

+53
-72
lines changed

1 file changed

+53
-72
lines changed

graphql/language/lexer.py

Lines changed: 53 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,8 @@ def desc(self) -> str:
103103
return f"{kind} {value!r}" if value else kind
104104

105105

106-
def char_at(s, pos):
107-
try:
108-
return s[pos]
109-
except IndexError:
110-
return None
111-
112-
113106
def print_char(char):
114-
return TokenKind.EOF.value if char is None else repr(char)
107+
return repr(char) if char else TokenKind.EOF.value
115108

116109

117110
_KIND_FOR_PUNCT = {
@@ -191,24 +184,23 @@ def read_token(self, prev: Token) -> Token:
191184
if pos >= body_length:
192185
return Token(TokenKind.EOF, body_length, body_length, line, col, prev)
193186

194-
char = char_at(body, pos)
195-
if char is not None:
196-
kind = _KIND_FOR_PUNCT.get(char)
197-
if kind:
198-
return Token(kind, pos, pos + 1, line, col, prev)
199-
if char == "#":
200-
return self.read_comment(pos, line, col, prev)
201-
elif char == ".":
202-
if char == char_at(body, pos + 1) == char_at(body, pos + 2):
203-
return Token(TokenKind.SPREAD, pos, pos + 3, line, col, prev)
204-
elif "A" <= char <= "Z" or "a" <= char <= "z" or char == "_":
205-
return self.read_name(pos, line, col, prev)
206-
elif "0" <= char <= "9" or char == "-":
207-
return self.read_number(pos, char, line, col, prev)
208-
elif char == '"':
209-
if char == char_at(body, pos + 1) == char_at(body, pos + 2):
210-
return self.read_block_string(pos, line, col, prev)
211-
return self.read_string(pos, line, col, prev)
187+
char = body[pos]
188+
kind = _KIND_FOR_PUNCT.get(char)
189+
if kind:
190+
return Token(kind, pos, pos + 1, line, col, prev)
191+
if char == "#":
192+
return self.read_comment(pos, line, col, prev)
193+
elif char == ".":
194+
if body[pos + 1 : pos + 3] == "..":
195+
return Token(TokenKind.SPREAD, pos, pos + 3, line, col, prev)
196+
elif "A" <= char <= "Z" or "a" <= char <= "z" or char == "_":
197+
return self.read_name(pos, line, col, prev)
198+
elif "0" <= char <= "9" or char == "-":
199+
return self.read_number(pos, char, line, col, prev)
200+
elif char == '"':
201+
if body[pos + 1 : pos + 3] == '""':
202+
return self.read_block_string(pos, line, col, prev)
203+
return self.read_string(pos, line, col, prev)
212204

213205
raise GraphQLSyntaxError(source, pos, unexpected_character_message(char))
214206

@@ -221,15 +213,15 @@ def position_after_whitespace(self, body: str, start_position: int) -> int:
221213
body_length = len(body)
222214
position = start_position
223215
while position < body_length:
224-
char = char_at(body, position)
225-
if char is not None and char in " \t,\ufeff":
216+
char = body[position]
217+
if char in " \t,\ufeff":
226218
position += 1
227219
elif char == "\n":
228220
position += 1
229221
self.line += 1
230222
self.line_start = position
231223
elif char == "\r":
232-
if char_at(body, position + 1) == "\n":
224+
if body[position + 1 : position + 2] == "\n":
233225
position += 2
234226
else:
235227
position += 1
@@ -245,8 +237,11 @@ def read_comment(self, start: int, line: int, col: int, prev: Token) -> Token:
245237
position = start
246238
while True:
247239
position += 1
248-
char = char_at(body, position)
249-
if char is None or (char < " " and char != "\t"):
240+
try:
241+
char = body[position]
242+
except IndexError:
243+
break
244+
if char < " " and char != "\t":
250245
break
251246
return Token(
252247
TokenKind.COMMENT,
@@ -271,32 +266,32 @@ def read_number(
271266
is_float = False
272267
if char == "-":
273268
position += 1
274-
char = char_at(body, position)
269+
char = body[position : position + 1]
275270
if char == "0":
276271
position += 1
277-
char = char_at(body, position)
278-
if char is not None and "0" <= char <= "9":
272+
char = body[position : position + 1]
273+
if "0" <= char <= "9":
279274
raise GraphQLSyntaxError(
280275
source,
281276
position,
282277
f"Invalid number, unexpected digit after 0: {print_char(char)}.",
283278
)
284279
else:
285280
position = self.read_digits(position, char)
286-
char = char_at(body, position)
281+
char = body[position : position + 1]
287282
if char == ".":
288283
is_float = True
289284
position += 1
290-
char = char_at(body, position)
285+
char = body[position : position + 1]
291286
position = self.read_digits(position, char)
292-
char = char_at(body, position)
293-
if char is not None and char in "Ee":
287+
char = body[position : position + 1]
288+
if char and char in "Ee":
294289
is_float = True
295290
position += 1
296-
char = char_at(body, position)
297-
if char is not None and char in "+-":
291+
char = body[position : position + 1]
292+
if char and char in "+-":
298293
position += 1
299-
char = char_at(body, position)
294+
char = body[position : position + 1]
300295
position = self.read_digits(position, char)
301296
return Token(
302297
TokenKind.FLOAT if is_float else TokenKind.INT,
@@ -313,9 +308,9 @@ def read_digits(self, start: int, char: str) -> int:
313308
source = self.source
314309
body = source.body
315310
position = start
316-
while char is not None and "0" <= char <= "9":
311+
while "0" <= char <= "9":
317312
position += 1
318-
char = char_at(body, position)
313+
char = body[position : position + 1]
319314
if position == start:
320315
raise GraphQLSyntaxError(
321316
source,
@@ -328,14 +323,15 @@ def read_string(self, start: int, line: int, col: int, prev: Token) -> Token:
328323
"""Read a string token from the source file."""
329324
source = self.source
330325
body = source.body
326+
body_length = len(body)
331327
position = start + 1
332328
chunk_start = position
333329
value: List[str] = []
334330
append = value.append
335331

336-
while position < len(body):
337-
char = char_at(body, position)
338-
if char is None or char in "\n\r":
332+
while position < body_length:
333+
char = body[position]
334+
if char in "\n\r":
339335
break
340336
if char == '"':
341337
append(body[chunk_start:position])
@@ -357,17 +353,12 @@ def read_string(self, start: int, line: int, col: int, prev: Token) -> Token:
357353
position += 1
358354
if char == "\\":
359355
append(body[chunk_start : position - 1])
360-
char = char_at(body, position)
356+
char = body[position : position + 1]
361357
escaped = _ESCAPED_CHARS.get(char)
362358
if escaped:
363359
value.append(escaped)
364-
elif char == "u":
365-
code = uni_char_code(
366-
char_at(body, position + 1),
367-
char_at(body, position + 2),
368-
char_at(body, position + 3),
369-
char_at(body, position + 4),
370-
)
360+
elif char == "u" and position + 4 < body_length:
361+
code = uni_char_code(*body[position + 1 : position + 5])
371362
if code < 0:
372363
escape = repr(body[position : position + 5])
373364
escape = escape[:1] + "\\" + escape[1:]
@@ -394,19 +385,14 @@ def read_string(self, start: int, line: int, col: int, prev: Token) -> Token:
394385
def read_block_string(self, start: int, line: int, col: int, prev: Token) -> Token:
395386
source = self.source
396387
body = source.body
388+
body_length = len(body)
397389
position = start + 3
398390
chunk_start = position
399391
raw_value = ""
400392

401-
while position < len(body):
402-
char = char_at(body, position)
403-
if char is None:
404-
break
405-
if (
406-
char == '"'
407-
and char_at(body, position + 1) == '"'
408-
and char_at(body, position + 2) == '"'
409-
):
393+
while position < body_length:
394+
char = body[position]
395+
if char == '"' and body[position + 1 : position + 3] == '""':
410396
raw_value += body[chunk_start:position]
411397
return Token(
412398
TokenKind.BLOCK_STRING,
@@ -429,18 +415,13 @@ def read_block_string(self, start: int, line: int, col: int, prev: Token) -> Tok
429415
self.line += 1
430416
self.line_start = position
431417
elif char == "\r":
432-
if char_at(body, position + 1) == "\n":
418+
if body[position + 1 : position + 2] == "\n":
433419
position += 2
434420
else:
435421
position += 1
436422
self.line += 1
437423
self.line_start = position
438-
elif (
439-
char == "\\"
440-
and char_at(body, position + 1) == '"'
441-
and char_at(body, position + 2) == '"'
442-
and char_at(body, position + 3) == '"'
443-
):
424+
elif char == "\\" and body[position + 1 : position + 4] == '"""':
444425
raw_value += body[chunk_start:position] + '"""'
445426
position += 4
446427
chunk_start = position
@@ -455,8 +436,8 @@ def read_name(self, start: int, line: int, col: int, prev: Token) -> Token:
455436
body_length = len(body)
456437
position = start + 1
457438
while position < body_length:
458-
char = char_at(body, position)
459-
if char is None or not (
439+
char = body[position]
440+
if not (
460441
char == "_"
461442
or "0" <= char <= "9"
462443
or "A" <= char <= "Z"

0 commit comments

Comments
 (0)