Skip to content

Commit 0b9f7b4

Browse files
authored
Handle reader comments in collections (#214)
* Handle reader comments in collections [WIP] * Slightly different approach to remove comments * A few more test cases
1 parent fee21ae commit 0b9f7b4

File tree

2 files changed

+100
-29
lines changed

2 files changed

+100
-29
lines changed

src/basilisp/reader.py

Lines changed: 61 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,15 @@
6161
_VECTOR = symbol.symbol('vector', ns='basilisp.core')
6262

6363

64+
class Comment:
65+
pass
66+
67+
68+
COMMENT = Comment()
69+
70+
LispReaderForm = Union[LispForm, Comment]
71+
72+
6473
class SyntaxError(Exception): # pylint:disable=redefined-builtin
6574
pass
6675

@@ -298,8 +307,10 @@ def _read_namespaced(ctx: ReaderContext, allowed_suffix: Optional[str] = None) -
298307
return ns_str, name_str
299308

300309

301-
def _read_coll(ctx: ReaderContext, f: Callable[[Collection[Any]], Union[
302-
llist.List, lset.Set, vector.Vector]], end_token: str, coll_name: str):
310+
def _read_coll(ctx: ReaderContext,
311+
f: Callable[[Collection[Any]], Union[llist.List, lset.Set, vector.Vector]],
312+
end_token: str,
313+
coll_name: str):
303314
"""Read a collection from the input stream and create the
304315
collection using f."""
305316
coll: List = []
@@ -315,6 +326,8 @@ def _read_coll(ctx: ReaderContext, f: Callable[[Collection[Any]], Union[
315326
reader.next_token()
316327
return f(coll)
317328
elem = _read_next(ctx)
329+
if elem is COMMENT:
330+
continue
318331
coll.append(elem)
319332

320333

@@ -350,8 +363,8 @@ def _read_interop(ctx: ReaderContext, end_token: str) -> llist.List:
350363

351364
token = reader.peek()
352365
if whitespace_chars.match(token):
353-
instance = _read_next(ctx)
354-
member = _read_next(ctx)
366+
instance = _read_next_consuming_comment(ctx)
367+
member = _read_next_consuming_comment(ctx)
355368
if not isinstance(member, symbol.Symbol):
356369
raise SyntaxError(f"Expected Symbol; found {type(member)}")
357370
is_property = member.name.startswith('-')
@@ -367,17 +380,17 @@ def _read_interop(ctx: ReaderContext, end_token: str) -> llist.List:
367380
seq.append(_INTEROP_PROP)
368381
if whitespace_chars.match(reader.peek()):
369382
raise SyntaxError(f"Expected Symbol; found whitespace")
370-
member = _read_next(ctx)
383+
member = _read_next_consuming_comment(ctx)
371384
if not isinstance(member, symbol.Symbol):
372385
raise SyntaxError(f"Expected Symbol; found {type(member)}")
373-
instance = _read_next(ctx)
386+
instance = _read_next_consuming_comment(ctx)
374387
seq.append(instance)
375388
seq.append(member)
376389
else:
377390
assert not whitespace_chars.match(token)
378391
seq.append(_INTEROP_CALL)
379-
member = _read_next(ctx)
380-
instance = _read_next(ctx)
392+
member = _read_next_consuming_comment(ctx)
393+
instance = _read_next_consuming_comment(ctx)
381394
if not isinstance(member, symbol.Symbol):
382395
raise SyntaxError(f"Expected Symbol; found {type(member)}")
383396
seq.append(instance)
@@ -391,6 +404,8 @@ def _read_interop(ctx: ReaderContext, end_token: str) -> llist.List:
391404
reader.next_token()
392405
return llist.list(seq)
393406
elem = _read_next(ctx)
407+
if elem is COMMENT or isinstance(elem, Comment):
408+
continue
394409
seq.append(elem)
395410

396411

@@ -438,11 +453,17 @@ def _read_map(ctx: ReaderContext) -> lmap.Map:
438453
reader.next_token()
439454
break
440455
k = _read_next(ctx)
441-
if reader.peek() == '}':
442-
raise SyntaxError("Unexpected token '}'; expected map value")
443-
v = _read_next(ctx)
444-
if k in d:
445-
raise SyntaxError("Duplicate key '{}' in map literal".format(k))
456+
if k is COMMENT:
457+
continue
458+
while True:
459+
if reader.peek() == '}':
460+
raise SyntaxError("Unexpected token '}'; expected map value")
461+
v = _read_next(ctx)
462+
if v is COMMENT:
463+
continue
464+
if k in d:
465+
raise SyntaxError(f"Duplicate key '{k}' in map literal")
466+
break
446467
d[k] = v
447468

448469
return lmap.map(d)
@@ -613,7 +634,7 @@ def _read_meta(ctx: ReaderContext) -> lmeta.Meta:
613634
input stream."""
614635
start = ctx.reader.advance()
615636
assert start == '^'
616-
meta = _read_next(ctx)
637+
meta = _read_next_consuming_comment(ctx)
617638

618639
meta_map = None
619640
if isinstance(meta, symbol.Symbol):
@@ -626,7 +647,7 @@ def _read_meta(ctx: ReaderContext) -> lmeta.Meta:
626647
raise SyntaxError(
627648
f"Expected symbol, keyword, or map for metadata, not {type(meta)}")
628649

629-
obj_with_meta = _read_next(ctx)
650+
obj_with_meta = _read_next_consuming_comment(ctx)
630651
try:
631652
return obj_with_meta.with_meta(meta_map) # type: ignore
632653
except AttributeError:
@@ -684,7 +705,7 @@ def _read_quoted(ctx: ReaderContext) -> llist.List:
684705
"""Read a quoted form from the input stream."""
685706
start = ctx.reader.advance()
686707
assert start == "'"
687-
next_form = _read_next(ctx)
708+
next_form = _read_next_consuming_comment(ctx)
688709
return llist.l(_QUOTE, next_form)
689710

690711

@@ -788,7 +809,7 @@ def _read_syntax_quoted(ctx: ReaderContext) -> LispForm:
788809
assert start == "`"
789810

790811
with ctx.syntax_quoted():
791-
return _process_syntax_quoted_form(ctx, _read_next(ctx))
812+
return _process_syntax_quoted_form(ctx, _read_next_consuming_comment(ctx))
792813

793814

794815
def _read_unquote(ctx: ReaderContext) -> LispForm:
@@ -811,18 +832,18 @@ def _read_unquote(ctx: ReaderContext) -> LispForm:
811832
next_char = ctx.reader.peek()
812833
if next_char == '@':
813834
ctx.reader.advance()
814-
next_form = _read_next(ctx)
835+
next_form = _read_next_consuming_comment(ctx)
815836
return llist.l(_UNQUOTE_SPLICING, next_form)
816837
else:
817-
next_form = _read_next(ctx)
838+
next_form = _read_next_consuming_comment(ctx)
818839
return llist.l(_UNQUOTE, next_form)
819840

820841

821842
def _read_deref(ctx: ReaderContext) -> LispForm:
822843
"""Read a derefed form from the input stream."""
823844
start = ctx.reader.advance()
824845
assert start == "@"
825-
next_form = _read_next(ctx)
846+
next_form = _read_next_consuming_comment(ctx)
826847
return llist.l(_DEREF, next_form)
827848

828849

@@ -888,7 +909,7 @@ def _read_regex(ctx: ReaderContext) -> Pattern:
888909
raise SyntaxError(f"Unrecognized regex pattern syntax: {s}")
889910

890911

891-
def _read_reader_macro(ctx: ReaderContext) -> LispForm:
912+
def _read_reader_macro(ctx: ReaderContext) -> LispReaderForm:
892913
"""Return a data structure evaluated as a reader
893914
macro from the input stream."""
894915
start = ctx.reader.advance()
@@ -907,11 +928,11 @@ def _read_reader_macro(ctx: ReaderContext) -> LispForm:
907928
elif token == "_":
908929
ctx.reader.advance()
909930
_read_next(ctx) # Ignore the entire next form
910-
return _read_next(ctx)
931+
return COMMENT
911932
elif ns_name_chars.match(token):
912933
s = _read_sym(ctx)
913934
assert isinstance(s, symbol.Symbol)
914-
v = _read_next(ctx)
935+
v = _read_next_consuming_comment(ctx)
915936
if s in ctx.data_readers:
916937
f = ctx.data_readers[s]
917938
return f(v)
@@ -921,7 +942,7 @@ def _read_reader_macro(ctx: ReaderContext) -> LispForm:
921942
raise SyntaxError(f"Unexpected token '{token}' in reader macro")
922943

923944

924-
def _read_comment(ctx: ReaderContext) -> LispForm:
945+
def _read_comment(ctx: ReaderContext) -> LispReaderForm:
925946
"""Read (and ignore) a single-line comment from the input stream.
926947
Return the next form after the next line break."""
927948
reader = ctx.reader
@@ -937,8 +958,20 @@ def _read_comment(ctx: ReaderContext) -> LispForm:
937958
reader.advance()
938959

939960

940-
def _read_next(ctx: ReaderContext) -> LispForm: # noqa: C901
941-
"""Read the next full token from the input stream."""
961+
def _read_next_consuming_comment(ctx: ReaderContext) -> LispForm:
962+
"""Read the next full form from the input stream, consuming any
963+
reader comments completely."""
964+
while True:
965+
v = _read_next(ctx)
966+
if v is __EOF:
967+
return __EOF
968+
if v is COMMENT or isinstance(v, Comment):
969+
continue
970+
return v
971+
972+
973+
def _read_next(ctx: ReaderContext) -> LispReaderForm: # noqa: C901
974+
"""Read the next full form from the input stream."""
942975
reader = ctx.reader
943976
token = reader.peek()
944977
if token == '(':
@@ -1000,6 +1033,8 @@ def read(stream, resolver: Resolver = None, data_readers: DataReaders = None) ->
10001033
expr = _read_next(ctx)
10011034
if expr is __EOF:
10021035
return
1036+
if expr is COMMENT or isinstance(expr, Comment):
1037+
continue
10031038
yield expr
10041039

10051040

tests/basilisp/reader_test.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -677,9 +677,45 @@ def test_invalid_meta_attachment():
677677

678678

679679
def test_comment_reader_macro():
680-
assert read_str_first('#_ (a list)') is None
681-
assert read_str_first('#_:keyword') is None
682-
assert read_str_first('#_:kw1 :kw2') == kw.keyword('kw2')
680+
assert None is read_str_first('#_ (a list)')
681+
assert None is read_str_first('#_1')
682+
assert None is read_str_first('#_"string"')
683+
assert None is read_str_first('#_:keyword')
684+
assert None is read_str_first('#_symbol')
685+
assert None is read_str_first('#_[]')
686+
assert None is read_str_first('#_{}')
687+
assert None is read_str_first('#_()')
688+
assert None is read_str_first('#_#{}')
689+
690+
assert kw.keyword('kw2') == read_str_first('#_:kw1 :kw2')
691+
692+
assert llist.List.empty() == read_str_first('(#_sym)')
693+
assert llist.l(sym.symbol('inc'), 5) == read_str_first('(inc #_counter 5)')
694+
assert llist.l(sym.symbol('dec'), 8) == read_str_first('(#_inc dec #_counter 8)')
695+
696+
assert vec.Vector.empty() == read_str_first('[#_m]')
697+
assert vec.v(1) == read_str_first('[#_m 1]')
698+
assert vec.v(1) == read_str_first('[#_m 1 #_2]')
699+
assert vec.v(1, 2) == read_str_first('[#_m 1 2]')
700+
assert vec.v(1, 4) == read_str_first('[#_m 1 #_2 4]')
701+
assert vec.v(1, 4) == read_str_first('[#_m 1 #_2 4 #_5]')
702+
703+
assert lset.Set.empty() == read_str_first('#{#_m}')
704+
assert lset.s(1) == read_str_first('#{#_m 1}')
705+
assert lset.s(1) == read_str_first('#{#_m 1 #_2}')
706+
assert lset.s(1, 2) == read_str_first('#{#_m 1 2}')
707+
assert lset.s(1, 4) == read_str_first('#{#_m 1 #_2 4}')
708+
assert lset.s(1, 4) == read_str_first('#{#_m 1 #_2 4 #_5}')
709+
710+
assert lmap.Map.empty() == read_str_first('{#_:key}')
711+
assert lmap.Map.empty() == read_str_first('{#_:key #_"value"}')
712+
assert lmap.map({kw.keyword('key'): "value"}) == read_str_first(
713+
'{:key #_"other" "value"}')
714+
assert lmap.map({kw.keyword('key'): "value"}) == read_str_first(
715+
'{:key "value" #_"other"}')
716+
717+
with pytest.raises(reader.SyntaxError):
718+
read_str_first('{:key #_"value"}')
683719

684720

685721
def test_comment_line():

0 commit comments

Comments
 (0)