Skip to content

Commit fb28054

Browse files
authored
Fix a bug where unknown data readers in non-selected reader conditional branches prevented reading altogether (#1121)
Fixes #1118
1 parent 31405ef commit fb28054

File tree

4 files changed

+197
-50
lines changed

4 files changed

+197
-50
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
* Fix a bug where `basilisp test` command fails due to an invalid `argparse` configuration (#1119)
1313
* Fix a bug where `basilisp.walk/walk` (and any functions that depend on it) did not preserve collection metadata (#1123)
1414
* Fix a bug where the private `postwalk` implementation in the reader did not preserve collection metadata (#1123)
15+
* Fix a bug where unknown data reader tags in non-selected reader conditional branches prevented reading the forms (#1118)
1516

1617
## [v0.3.1]
1718
### Added

src/basilisp/lang/reader.py

Lines changed: 120 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
lrepr,
5252
)
5353
from basilisp.lang.source import format_source_context
54+
from basilisp.lang.tagged import TaggedLiteral, tagged_literal
5455
from basilisp.lang.typing import IterableLispForm, LispForm, ReaderForm
5556
from basilisp.lang.util import munge
5657
from basilisp.util import Maybe, partition
@@ -363,6 +364,7 @@ class ReaderContext:
363364
"_default_data_reader_fn",
364365
"_features",
365366
"_process_reader_cond",
367+
"_process_tagged_literals",
366368
"_reader",
367369
"_resolve",
368370
"_in_anon_fn",
@@ -389,6 +391,7 @@ def __init__( # pylint: disable=too-many-arguments
389391
self._process_reader_cond = process_reader_cond
390392
self._reader = reader
391393
self._resolve = Maybe(resolver).or_else_get(lambda x: x)
394+
self._process_tagged_literals: collections.deque[bool] = collections.deque([])
392395
self._in_anon_fn: collections.deque[bool] = collections.deque([])
393396
self._syntax_quoted: collections.deque[bool] = collections.deque([])
394397
self._gensym_env: collections.deque[GenSymEnvironment] = collections.deque([])
@@ -434,6 +437,19 @@ def is_in_anon_fn(self) -> bool:
434437
except IndexError:
435438
return False
436439

440+
@contextlib.contextmanager
441+
def process_tagged_literals(self, v: bool):
442+
self._process_tagged_literals.append(v)
443+
yield
444+
self._process_tagged_literals.pop()
445+
446+
@property
447+
def should_process_tagged_literals(self) -> bool:
448+
try:
449+
return self._process_tagged_literals[-1] is True
450+
except IndexError:
451+
return True
452+
437453
@property
438454
def gensym_env(self) -> GenSymEnvironment:
439455
return self._gensym_env[-1]
@@ -655,7 +671,7 @@ def _read_coll(
655671
continue
656672
elif _should_splice_reader_conditional(ctx, elem):
657673
assert isinstance(elem, ReaderConditional)
658-
selected_feature = elem.select_feature(ctx.reader_features)
674+
selected_feature = _select_reader_conditional_branch(ctx, elem)
659675
if selected_feature is ReaderConditional.FEATURE_NOT_PRESENT:
660676
continue
661677
elif isinstance(selected_feature, vec.PersistentVector):
@@ -726,7 +742,7 @@ def __read_map_elems(ctx: ReaderContext) -> Iterable[RawReaderForm]:
726742
continue
727743
elif _should_splice_reader_conditional(ctx, v):
728744
assert isinstance(v, ReaderConditional)
729-
selected_feature = v.select_feature(ctx.reader_features)
745+
selected_feature = _select_reader_conditional_branch(ctx, v)
730746
if selected_feature is ReaderConditional.FEATURE_NOT_PRESENT:
731747
continue
732748
elif isinstance(selected_feature, vec.PersistentVector):
@@ -1443,6 +1459,23 @@ def _read_numeric_constant(ctx: ReaderContext) -> float:
14431459
return c
14441460

14451461

1462+
def _select_reader_conditional_branch(
1463+
ctx: ReaderContext, reader_cond: ReaderConditional
1464+
) -> LispReaderForm:
1465+
"""Select the reader conditional branch by feature and then resolve any tagged
1466+
literals for the selected feature."""
1467+
1468+
def resolve_tagged_literals(form: LispReaderForm):
1469+
if isinstance(form, TaggedLiteral):
1470+
resolved = _postwalk(resolve_tagged_literals, form.form)
1471+
return _resolve_tagged_literal(ctx, form.tag, resolved)
1472+
return form
1473+
1474+
return _postwalk(
1475+
resolve_tagged_literals, reader_cond.select_feature(ctx.reader_features)
1476+
)
1477+
1478+
14461479
def _should_splice_reader_conditional(ctx: ReaderContext, form: LispReaderForm) -> bool:
14471480
"""Return True if and only if form is a ReaderConditional which should be spliced
14481481
into a surrounding collection context."""
@@ -1453,9 +1486,61 @@ def _should_splice_reader_conditional(ctx: ReaderContext, form: LispReaderForm)
14531486
)
14541487

14551488

1456-
def _read_reader_conditional_preserving(ctx: ReaderContext) -> ReaderConditional:
1457-
"""Read a reader conditional form and return the unprocessed reader
1458-
conditional object."""
1489+
def _read_reader_conditional_preserving(
1490+
ctx: ReaderContext, is_splicing: bool
1491+
) -> ReaderConditional:
1492+
"""Read a reader conditional form and return the reader conditional object."""
1493+
coll: list = []
1494+
reader = ctx.reader
1495+
while True:
1496+
char = reader.peek()
1497+
if char == "":
1498+
raise ctx.eof_error("Unexpected EOF in reader conditional")
1499+
if whitespace_chars.match(char):
1500+
reader.advance()
1501+
continue
1502+
if char == ")":
1503+
reader.next_char()
1504+
return ReaderConditional(llist.list(coll), is_splicing=is_splicing)
1505+
1506+
with ctx.process_tagged_literals(False):
1507+
elem = _read_next(ctx)
1508+
1509+
if elem is COMMENT or isinstance(elem, Comment):
1510+
continue
1511+
elif _should_splice_reader_conditional(ctx, elem):
1512+
assert isinstance(elem, ReaderConditional)
1513+
selected_feature = _select_reader_conditional_branch(ctx, elem)
1514+
if selected_feature is ReaderConditional.FEATURE_NOT_PRESENT:
1515+
continue
1516+
elif isinstance(selected_feature, vec.PersistentVector):
1517+
coll.extend(selected_feature)
1518+
else:
1519+
raise ctx.syntax_error(
1520+
"Expecting Vector for splicing reader conditional "
1521+
f"form; got {type(selected_feature)}"
1522+
)
1523+
else:
1524+
assert (
1525+
not isinstance(elem, ReaderConditional)
1526+
or not ctx.should_process_reader_cond
1527+
), "Reader conditionals must be processed if specified"
1528+
coll.append(elem)
1529+
1530+
1531+
def _read_reader_conditional(ctx: ReaderContext) -> LispReaderForm:
1532+
"""Read a reader conditional form and either return it or process it and
1533+
return the resulting form.
1534+
1535+
If the reader is not set to process the reader conditional, it will always
1536+
be returned as a ReaderConditional object.
1537+
1538+
If the reader is set to process reader conditionals, only non-splicing reader
1539+
conditionals are processed here. If no matching feature is found in a
1540+
non-splicing reader conditional, a comment will be emitted (which is ultimately
1541+
discarded downstream in the reader).
1542+
1543+
Splicing reader conditionals are processed in the respective collection readers."""
14591544
reader = ctx.reader
14601545
start = reader.advance()
14611546
assert start == "?"
@@ -1477,27 +1562,9 @@ def _read_reader_conditional_preserving(ctx: ReaderContext) -> ReaderConditional
14771562
f"Expected opening '(' for reader conditional; got '{open_char}'"
14781563
)
14791564

1480-
feature_list = _read_coll(ctx, llist.list, ")", "reader conditional")
1481-
assert isinstance(feature_list, llist.PersistentList)
1482-
return ReaderConditional(feature_list, is_splicing=is_splicing)
1483-
1484-
1485-
def _read_reader_conditional(ctx: ReaderContext) -> LispReaderForm:
1486-
"""Read a reader conditional form and either return it or process it and
1487-
return the resulting form.
1488-
1489-
If the reader is not set to process the reader conditional, it will always
1490-
be returned as a ReaderConditional object.
1491-
1492-
If the reader is set to process reader conditionals, only non-splicing reader
1493-
conditionals are processed here. If no matching feature is found in a
1494-
non-splicing reader conditional, a comment will be emitted (which is ultimately
1495-
discarded downstream in the reader).
1496-
1497-
Splicing reader conditionals are processed in the respective collection readers."""
1498-
reader_cond = _read_reader_conditional_preserving(ctx)
1565+
reader_cond = _read_reader_conditional_preserving(ctx, is_splicing)
14991566
if ctx.should_process_reader_cond and not reader_cond.is_splicing:
1500-
form = reader_cond.select_feature(ctx.reader_features)
1567+
form = _select_reader_conditional_branch(ctx, reader_cond)
15011568
return cast(
15021569
LispReaderForm,
15031570
COMMENT if form is ReaderConditional.FEATURE_NOT_PRESENT else form,
@@ -1544,9 +1611,32 @@ def _load_record_or_type(
15441611
raise ctx.syntax_error("Records may only be constructed from Vectors and Maps")
15451612

15461613

1614+
def _resolve_tagged_literal(
1615+
ctx: ReaderContext, s: sym.Symbol, v: RawReaderForm
1616+
) -> LispReaderForm:
1617+
"""Resolve a tagged literal into whatever value is returned by the associated data reader."""
1618+
data_reader = None
1619+
if s in ctx.data_readers:
1620+
data_reader = ctx.data_readers[s]
1621+
elif s in ReaderContext._DATA_READERS:
1622+
data_reader = ReaderContext._DATA_READERS[s]
1623+
1624+
if data_reader is not None:
1625+
try:
1626+
return data_reader(v)
1627+
except SyntaxError as e:
1628+
raise ctx.syntax_error(e.message).with_traceback(e.__traceback__) from None
1629+
elif s.ns is None and "." in s.name:
1630+
return _load_record_or_type(ctx, s, v)
1631+
else:
1632+
try:
1633+
return ctx.default_data_reader_fn(s, v)
1634+
except SyntaxError as e:
1635+
raise ctx.syntax_error(e.message).with_traceback(e.__traceback__) from None
1636+
1637+
15471638
def _read_reader_macro(ctx: ReaderContext) -> LispReaderForm: # noqa: MC0001
1548-
"""Return a data structure evaluated as a reader
1549-
macro from the input stream."""
1639+
"""Return a data structure evaluated as a reader macro from the input stream."""
15501640
start = ctx.reader.advance()
15511641
assert start == "#"
15521642
char = ctx.reader.peek()
@@ -1587,28 +1677,10 @@ def _read_reader_macro(ctx: ReaderContext) -> LispReaderForm: # noqa: MC0001
15871677

15881678
v = _read_next_consuming_comment(ctx)
15891679

1590-
data_reader = None
1591-
if s in ctx.data_readers:
1592-
data_reader = ctx.data_readers[s]
1593-
elif s in ReaderContext._DATA_READERS:
1594-
data_reader = ReaderContext._DATA_READERS[s]
1680+
if not ctx.should_process_tagged_literals:
1681+
return tagged_literal(s, v)
15951682

1596-
if data_reader is not None:
1597-
try:
1598-
return data_reader(v)
1599-
except SyntaxError as e:
1600-
raise ctx.syntax_error(e.message).with_traceback(
1601-
e.__traceback__
1602-
) from None
1603-
elif s.ns is None and "." in s.name:
1604-
return _load_record_or_type(ctx, s, v)
1605-
else:
1606-
try:
1607-
return ctx.default_data_reader_fn(s, v)
1608-
except SyntaxError as e:
1609-
raise ctx.syntax_error(e.message).with_traceback(
1610-
e.__traceback__
1611-
) from None
1683+
return _resolve_tagged_literal(ctx, s, v)
16121684

16131685
raise ctx.syntax_error(f"Unexpected char '{char}' in reader macro")
16141686

src/basilisp/lang/typing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ISeq,
2020
IType,
2121
)
22+
from basilisp.lang.tagged import TaggedLiteral
2223

2324
CompilerOpts = IPersistentMap[kw.Keyword, bool]
2425

@@ -48,7 +49,7 @@
4849
uuid.UUID,
4950
]
5051
PyCollectionForm = Union[dict, list, set, tuple]
51-
ReaderForm = Union[LispForm, IRecord, ISeq, IType, PyCollectionForm]
52+
ReaderForm = Union[LispForm, IRecord, ISeq, IType, PyCollectionForm, TaggedLiteral]
5253
SpecialForm = Union[llist.PersistentList, ISeq]
5354

5455

tests/basilisp/reader_test.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from basilisp.lang import vector as vec
2323
from basilisp.lang.exception import format_exception
2424
from basilisp.lang.interfaces import IPersistentSet
25+
from basilisp.lang.tagged import tagged_literal
2526

2627

2728
@pytest.fixture
@@ -1551,14 +1552,18 @@ class TestReaderConditional:
15511552
"#?(clj 1 :lpy 2 :default)",
15521553
],
15531554
)
1554-
def test_basic_form_syntax(self, v: str):
1555+
def test_invalid_basic_form_syntax(self, v: str):
15551556
with pytest.raises(reader.SyntaxError):
15561557
read_str_first(v)
15571558

15581559
def test_basic_form(self):
15591560
assert 2 == read_str_first("#?(:clj 1 :lpy 2 :default 3)")
1561+
assert 2 == read_str_first("#?(:clj #_1 1 #_:lpy :lpy 2 :default 3)")
15601562
assert 1 == read_str_first("#?(:default 1 :lpy 2)")
15611563
assert None is read_str_first("#?(:clj 1 :cljs 2)")
1564+
assert [[], (), {}, set()] == read_str_first(
1565+
"#?(:cljs #js [] :lpy #py [#py [] #py () #py {} #py #{}] :default [])"
1566+
)
15621567

15631568
def test_basic_form_preserving(self):
15641569
c = read_str_first("#?(:clj 1 :lpy 2 :default 3)", process_reader_cond=False)
@@ -1570,6 +1575,72 @@ def test_basic_form_preserving(self):
15701575
) == c.val_at(reader.READER_COND_FORM_KW)
15711576
assert "#?(:clj 1 :lpy 2 :default 3)" == c.lrepr()
15721577

1578+
def test_form_preserving_with_unknown_data_readers(self):
1579+
c = read_str_first(
1580+
"#?(:cljs #js [] :lpy #py [] :default [])", process_reader_cond=False
1581+
)
1582+
assert isinstance(c, reader.ReaderConditional)
1583+
assert not c.is_splicing
1584+
assert False is c.val_at(reader.READER_COND_SPLICING_KW)
1585+
assert llist.l(
1586+
kw.keyword("cljs"),
1587+
tagged_literal(sym.symbol("js"), vec.EMPTY),
1588+
kw.keyword("lpy"),
1589+
tagged_literal(sym.symbol("py"), vec.EMPTY),
1590+
kw.keyword("default"),
1591+
vec.EMPTY,
1592+
) == c.val_at(reader.READER_COND_FORM_KW)
1593+
assert "#?(:cljs #js [] :lpy #py [] :default [])" == c.lrepr()
1594+
1595+
def test_ignore_unknown_data_readers_in_non_selected_conditional(self):
1596+
v = read_str_first("#?(:cljs #js [] :default [])")
1597+
assert isinstance(v, vec.PersistentVector)
1598+
assert v == vec.EMPTY
1599+
1600+
@pytest.mark.parametrize(
1601+
"s,expected",
1602+
[
1603+
(
1604+
"#?(:cljs [#?(:lpy :py :default :other)] :default :none)",
1605+
kw.keyword("none"),
1606+
),
1607+
(
1608+
"#?(:lpy [#?(:lpy :py :default :other)] :default :none)",
1609+
vec.v(kw.keyword("py")),
1610+
),
1611+
(
1612+
"#?(:lpy [#?(:clj :py :default :other)] :default :none)",
1613+
vec.v(kw.keyword("other")),
1614+
),
1615+
(
1616+
"#?(:cljs [#?@(:clj [1 2] :default [3 4])] :default :none)",
1617+
kw.keyword("none"),
1618+
),
1619+
(
1620+
"#?(:lpy [#?@(:clj [1 2] :default [3 4])] :default :none)",
1621+
vec.v(3, 4),
1622+
),
1623+
(
1624+
"#?(:lpy [#?@(:clj [1 2] :cljs [3 4])] :default :none)",
1625+
vec.EMPTY,
1626+
),
1627+
(
1628+
"#?(#?@(:clj [:clj [1 2]] :lpy [:lpy [3 4]]) :default [])",
1629+
vec.v(3, 4),
1630+
),
1631+
(
1632+
"#?(#?@(:clj [:clj [1 2]] :lpy [:cljs [3 4]]) :default [])",
1633+
vec.EMPTY,
1634+
),
1635+
(
1636+
"#?(#?@(:clj [1 2]) :default :none)",
1637+
kw.keyword("none"),
1638+
),
1639+
],
1640+
)
1641+
def test_nested_reader_conditionals(self, s: str, expected):
1642+
assert expected == read_str_first(s)
1643+
15731644
@pytest.mark.parametrize(
15741645
"v",
15751646
[
@@ -1598,6 +1669,8 @@ def test_basic_form_preserving(self):
15981669
"#?@(:clj)",
15991670
"#?@(:clj [1] lpy)",
16001671
"#?@(clj [1] :lpy [2] :default)",
1672+
# Invalid splice connection (in nested reader conditional)
1673+
"#?(#?@(:lpy (:lpy [])) :default :none)",
16011674
],
16021675
)
16031676
def test_splicing_form_syntax(self, v: str):

0 commit comments

Comments
 (0)