Skip to content

Commit 2af3efd

Browse files
Split parser into main entrypoint and two helpers (#211)
`parse` now calls into `parse_binary` and `parse_unary` and is a bit easier to read.
1 parent f8fdca6 commit 2af3efd

File tree

1 file changed

+38
-27
lines changed

1 file changed

+38
-27
lines changed

scrapscript.py

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ class UnexpectedEOFError(ParseError):
351351

352352

353353
def parse_assign(tokens: typing.List[Token], p: float = 0) -> "Assign":
354-
assign = parse(tokens, p)
354+
assign = parse_binary(tokens, p)
355355
if isinstance(assign, Spread):
356356
return Assign(Var("..."), assign)
357357
if not isinstance(assign, Assign):
@@ -371,26 +371,26 @@ def gensym_reset() -> None:
371371
gensym_reset()
372372

373373

374-
def parse(tokens: typing.List[Token], p: float = 0) -> "Object":
374+
def parse_unary(tokens: typing.List[Token], p: float) -> "Object":
375375
if not tokens:
376376
raise UnexpectedEOFError("unexpected end of input")
377377
token = tokens.pop(0)
378378
l: Object
379379
if isinstance(token, IntLit):
380-
l = Int(token.value)
380+
return Int(token.value)
381381
elif isinstance(token, FloatLit):
382-
l = Float(token.value)
382+
return Float(token.value)
383383
elif isinstance(token, Name):
384384
# TODO: Handle kebab case vars
385-
l = Var(token.value)
385+
return Var(token.value)
386386
elif isinstance(token, VariantToken):
387387
# It needs to be higher than the precedence of the -> operator so that
388388
# we can match variants in MatchFunction
389389
# It needs to be higher than the precedence of the && operator so that
390390
# we can use #true() and #false() in boolean expressions
391391
# It needs to be higher than the precedence of juxtaposition so that
392392
# f #true() #false() is parsed as f(TRUE)(FALSE)
393-
l = Variant(token.value, parse(tokens, PS[""].pr + 1))
393+
return Variant(token.value, parse_binary(tokens, PS[""].pr + 1))
394394
elif isinstance(token, BytesLit):
395395
base = token.base
396396
if base == 85:
@@ -403,45 +403,48 @@ def parse(tokens: typing.List[Token], p: float = 0) -> "Object":
403403
l = Bytes(base64.b16decode(token.value))
404404
else:
405405
raise ParseError(f"unexpected base {base!r} in {token!r}")
406+
return l
406407
elif isinstance(token, StringLit):
407-
l = String(token.value)
408+
return String(token.value)
408409
elif token == Operator("..."):
409410
if tokens and isinstance(tokens[0], Name):
410411
name = tokens[0].value
411412
tokens.pop(0)
412-
l = Spread(name)
413+
return Spread(name)
413414
else:
414-
l = Spread()
415+
return Spread()
415416
elif token == Operator("|"):
416-
expr = parse(tokens, PS["|"].pr) # TODO: make this work for larger arities
417+
expr = parse_binary(tokens, PS["|"].pr) # TODO: make this work for larger arities
417418
if not isinstance(expr, Function):
418419
raise ParseError(f"expected function in match expression {expr!r}")
419420
cases = [MatchCase(expr.arg, expr.body)]
420421
while tokens and tokens[0] == Operator("|"):
421422
tokens.pop(0)
422-
expr = parse(tokens, PS["|"].pr) # TODO: make this work for larger arities
423+
expr = parse_binary(tokens, PS["|"].pr) # TODO: make this work for larger arities
423424
if not isinstance(expr, Function):
424425
raise ParseError(f"expected function in match expression {expr!r}")
425426
cases.append(MatchCase(expr.arg, expr.body))
426-
l = MatchFunction(cases)
427+
return MatchFunction(cases)
427428
elif isinstance(token, LeftParen):
428429
if isinstance(tokens[0], RightParen):
429430
l = Hole()
430431
else:
431432
l = parse(tokens)
432433
tokens.pop(0)
434+
return l
433435
elif isinstance(token, LeftBracket):
434436
l = List([])
435437
token = tokens[0]
436438
if isinstance(token, RightBracket):
437439
tokens.pop(0)
438440
else:
439-
l.items.append(parse(tokens, 2))
441+
l.items.append(parse_binary(tokens, 2))
440442
while not isinstance(tokens.pop(0), RightBracket):
441443
if isinstance(l.items[-1], Spread):
442444
raise ParseError("spread must come at end of list match")
443445
# TODO: Implement .. operator
444-
l.items.append(parse(tokens, 2))
446+
l.items.append(parse_binary(tokens, 2))
447+
return l
445448
elif isinstance(token, LeftBrace):
446449
l = Record({})
447450
token = tokens[0]
@@ -456,17 +459,21 @@ def parse(tokens: typing.List[Token], p: float = 0) -> "Object":
456459
# TODO: Implement .. operator
457460
assign = parse_assign(tokens, 2)
458461
l.data[assign.name.name] = assign.value
462+
return l
459463
elif token == Operator("-"):
460464
# Unary minus
461465
# Precedence was chosen to be higher than binary ops so that -a op
462466
# b is (-a) op b and not -(a op b).
463467
# Precedence was chosen to be higher than function application so that
464468
# -a b is (-a) b and not -(a b).
465-
r = parse(tokens, HIGHEST_PREC + 1)
466-
l = Binop(BinopKind.SUB, Int(0), r)
469+
r = parse_binary(tokens, HIGHEST_PREC + 1)
470+
return Binop(BinopKind.SUB, Int(0), r)
467471
else:
468472
raise ParseError(f"unexpected token {token!r}")
469473

474+
475+
def parse_binary(tokens: typing.List[Token], p: float) -> "Object":
476+
l: Object = parse_unary(tokens, p)
470477
while True:
471478
if not tokens:
472479
break
@@ -478,7 +485,7 @@ def parse(tokens: typing.List[Token], p: float = 0) -> "Object":
478485
pl, pr = prec.pl, prec.pr
479486
if pl < p:
480487
break
481-
l = Apply(l, parse(tokens, pr))
488+
l = Apply(l, parse_binary(tokens, pr))
482489
continue
483490
prec = PS[op.value]
484491
pl, pr = prec.pl, prec.pr
@@ -488,34 +495,38 @@ def parse(tokens: typing.List[Token], p: float = 0) -> "Object":
488495
if op == Operator("="):
489496
if not isinstance(l, Var):
490497
raise ParseError(f"expected variable in assignment {l!r}")
491-
l = Assign(l, parse(tokens, pr))
498+
l = Assign(l, parse_binary(tokens, pr))
492499
elif op == Operator("->"):
493-
l = Function(l, parse(tokens, pr))
500+
l = Function(l, parse_binary(tokens, pr))
494501
elif op == Operator("|>"):
495-
l = Apply(parse(tokens, pr), l)
502+
l = Apply(parse_binary(tokens, pr), l)
496503
elif op == Operator("<|"):
497-
l = Apply(l, parse(tokens, pr))
504+
l = Apply(l, parse_binary(tokens, pr))
498505
elif op == Operator(">>"):
499-
r = parse(tokens, pr)
506+
r = parse_binary(tokens, pr)
500507
varname = gensym()
501508
l = Function(Var(varname), Apply(r, Apply(l, Var(varname))))
502509
elif op == Operator("<<"):
503-
r = parse(tokens, pr)
510+
r = parse_binary(tokens, pr)
504511
varname = gensym()
505512
l = Function(Var(varname), Apply(l, Apply(r, Var(varname))))
506513
elif op == Operator("."):
507-
l = Where(l, parse(tokens, pr))
514+
l = Where(l, parse_binary(tokens, pr))
508515
elif op == Operator("?"):
509-
l = Assert(l, parse(tokens, pr))
516+
l = Assert(l, parse_binary(tokens, pr))
510517
elif op == Operator("@"):
511518
# TODO: revisit whether to use @ or . for field access
512-
l = Access(l, parse(tokens, pr))
519+
l = Access(l, parse_binary(tokens, pr))
513520
else:
514521
assert isinstance(op, Operator)
515-
l = Binop(BinopKind.from_str(op.value), l, parse(tokens, pr))
522+
l = Binop(BinopKind.from_str(op.value), l, parse_binary(tokens, pr))
516523
return l
517524

518525

526+
def parse(tokens: typing.List[Token]) -> "Object":
527+
return parse_binary(tokens, 0)
528+
529+
519530
@dataclass(eq=True, frozen=True, unsafe_hash=True)
520531
class Object:
521532
def __str__(self) -> str:

0 commit comments

Comments
 (0)