Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Added the `basilisp.url` namespace for structured URL manipulation (#1239)
* Added support for proxies (#425)
* Added a `:slots` meta flag for `deftype` to disable creation of `__slots__` on created types (#1241)
* Added support for f-strings (#922)

### Changed
* Removed implicit support for single-use iterables in sequences, and introduced `iterator-seq` to expliciltly handle them (#1192)
Expand Down
22 changes: 22 additions & 0 deletions docs/reader.rst
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,28 @@ Their meanings match the equivalent escape sequences supported in `Python string

:ref:`strings_and_byte_strings`

.. _reader_f_strings:

f-strings
^^^^^^^^^

::

basilisp.user=> #f ""
""
basilisp.user=> (let [a 1] #f "this is a string with {(inc a)}")
"this is a string with 2"
basilisp.user=> (let [a 1] #f "this is a string with \{(inc a)}")
"this is a string with {(inc a)}"

f-strings are denoted as a series of characters enclosed by ``"`` quotation marks and preceded by a ``#f``.
Expressions may be interpolated in the string enclosed in ``{}``.
Each interpolation must contain exactly 1 expression and may be surrounded by optional whitespace characters which will not be included in the final string.
Any valid expression may appear in a string interpolation, including another string.
To include a literal opening ``{`` character, it must be escaped as ``\{``.

f-strings are otherwise identical to standard :ref:`string literals <reader_strings>`.

.. _reader_byte_strings:

Byte Strings
Expand Down
72 changes: 60 additions & 12 deletions src/basilisp/lang/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
_HASH_SET = sym.symbol("hash-set", ns="basilisp.core")
_LIST = sym.symbol("list", ns="basilisp.core")
_SEQ = sym.symbol("seq", ns="basilisp.core")
_STR = sym.symbol("str", ns="basilisp.core")
_UNQUOTE = sym.symbol("unquote", ns="basilisp.core")
_UNQUOTE_SPLICING = sym.symbol("unquote-splicing", ns="basilisp.core")
_VECTOR = sym.symbol("vector", ns="basilisp.core")
Expand Down Expand Up @@ -599,6 +600,14 @@ def with_lineno_and_col(ctx, **kwargs):
return cast(W, with_lineno_and_col)


def _consume_whitespace(ctx: ReaderContext) -> str:
reader = ctx.reader
char = reader.peek()
while whitespace_chars.match(char):
char = reader.next_char()
return char


def _read_namespaced(
ctx: ReaderContext, allowed_suffix: Optional[str] = None
) -> tuple[Optional[str], str]:
Expand Down Expand Up @@ -831,9 +840,7 @@ def _read_namespaced_map(ctx: ReaderContext) -> lmap.PersistentMap:
"be specified as keywords without namespaces"
)

char = ctx.reader.peek()
while whitespace_chars.match(char):
char = ctx.reader.next_char()
_consume_whitespace(ctx)

return _read_map(ctx, namespace=map_ns)

Expand Down Expand Up @@ -966,6 +973,49 @@ def _read_str(ctx: ReaderContext, allow_arbitrary_escapes: bool = False) -> str:
s.append(char)


def _read_fstr(ctx: ReaderContext) -> Union[str, llist.PersistentList]:
"""Return a UTF-8 encoded string from the input stream."""
elems: list[LispReaderForm] = []
s: list[str] = []
reader = ctx.reader

_consume_whitespace(ctx)

while True:
char = reader.next_char()
if char == "":
raise ctx.eof_error("Unexpected EOF in string")
if char == "\\":
char = reader.next_char()
escape_char = _STR_ESCAPE_CHARS.get(char, None)
if escape_char:
s.append(escape_char)
continue
if char == "{":
s.append(char)
continue
raise ctx.syntax_error(f"Unknown escape sequence: \\{char}")
if char == '"':
reader.next_char()
elems.append("".join(s))
if all(isinstance(elem, str) for elem in elems):
return "".join(cast(list[str], elems))
else:
return llist.list([_STR, *elems])
if char == "{":
reader.next_char()
elems.append("".join(s))
s = []
expr = _read_next(ctx)
elems.append(expr)
char = _consume_whitespace(ctx)
if char != "}":
raise ctx.syntax_error("Expected single expression in f-string")
continue

s.append(char)


_BYTES_ESCAPE_CHARS = {
'"': b'"',
"\\": b"\\",
Expand Down Expand Up @@ -1000,9 +1050,7 @@ def _read_byte_str(ctx: ReaderContext) -> bytes:
"""
reader = ctx.reader

char = reader.peek()
while whitespace_chars.match(char):
char = reader.next_char()
char = _consume_whitespace(ctx)

if char != '"':
raise ctx.syntax_error(f"Expected '\"'; got '{char}' instead")
Expand Down Expand Up @@ -1681,8 +1729,11 @@ def _read_reader_macro(ctx: ReaderContext) -> LispReaderForm:
elif ns_name_chars.match(char):
s = _read_sym(ctx, is_reader_macro_sym=True)
assert isinstance(s, sym.Symbol)
if s.ns is None and s.name == "b":
return _read_byte_str(ctx)
if s.ns is None:
if s.name == "b":
return _read_byte_str(ctx)
elif s.name == "f":
return _read_fstr(ctx)

v = _read_next_consuming_comment(ctx)

Expand Down Expand Up @@ -1724,10 +1775,7 @@ def _read_next_consuming_comment(ctx: ReaderContext) -> RawReaderForm:

def _read_next_consuming_whitespace(ctx: ReaderContext) -> LispReaderForm:
"""Read the next full form from the input stream, consuming any whitespace."""
reader = ctx.reader
char = reader.peek()
while whitespace_chars.match(char):
char = reader.next_char()
_consume_whitespace(ctx)
return _read_next(ctx)


Expand Down
85 changes: 85 additions & 0 deletions tests/basilisp/reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,91 @@ def test_missing_terminating_quote(self):
read_str_first('"Start of a string')


class TestFormatString:
def test_must_include_quote(self):
with pytest.raises(reader.SyntaxError):
read_str_first(r"#f []")

@pytest.mark.parametrize(
"v,raw",
[
("", '#f ""'),
('"', r'#f "\""'),
("\\", r'#f "\\"'),
("\a", r'#f "\a"'),
("\b", r'#f "\b"'),
("\f", r'#f "\f"'),
("\n", r'#f "\n"'),
("\r", r'#f "\r"'),
("\t", r'#f "\t"'),
("\v", r'#f "\v"'),
("Hello,\nmy name is\tChris.", r'#f "Hello,\nmy name is\tChris."'),
("Regular string", '#f "Regular string"'),
("String with 'inner string'", "#f \"String with 'inner string'\""),
('String with "inner string"', r'#f "String with \"inner string\""'),
],
)
def test_legal_string_is_legal_fstring(self, v: str, raw: str):
assert v == read_str_first(raw)

@pytest.mark.parametrize(
"v,raw",
[
(
llist.l(
reader._STR, "[", kw.keyword("whitespace", ns="surrounded.by"), "]"
),
'#f "[{ :surrounded.by/whitespace }]""',
),
(llist.l(reader._STR, "[", None, "]"), '#f "[{nil}]""'),
(llist.l(reader._STR, "[", True, "]"), '#f "[{true}]""'),
(llist.l(reader._STR, "[", False, "]"), '#f "[{false}]""'),
(llist.l(reader._STR, "[", 0, "]"), '#f "[{0}]""'),
(llist.l(reader._STR, "[", 0.1, "]"), '#f "[{0.1}]""'),
(llist.l(reader._STR, "[", kw.keyword("a"), "]"), '#f "[{:a}]""'),
(llist.l(reader._STR, "[", sym.symbol("sym"), "]"), '#f "[{sym}]""'),
(
llist.l(
reader._STR, "[", llist.l(reader._QUOTE, sym.symbol("sym")), "]"
),
'#f "[{\'sym}]""',
),
(llist.l(reader._STR, "[", vec.EMPTY, "]"), '#f "[{[]}]""'),
(llist.l(reader._STR, "[", vec.v("string"), "]"), '#f "[{["string"]}]""'),
(llist.l(reader._STR, "[", llist.EMPTY, "]"), '#f "[{()}]""'),
(llist.l(reader._STR, "[", llist.l("string"), "]"), '#f "[{("string")}]""'),
(llist.l(reader._STR, "[", lset.EMPTY, "]"), '#f "[{#{}}]""'),
(llist.l(reader._STR, "[", lset.s("string"), "]"), '#f "[{#{"string"}}]""'),
(llist.l(reader._STR, "[", lmap.EMPTY, "]"), '#f "[{{}}]""'),
(
llist.l(reader._STR, "[", lmap.map({kw.keyword("a"): "string"}), "]"),
'#f "[{{:a "string"}}]""',
),
("{}", r'#f "\{}""'),
("{(inc 1)}", r'#f "\{(inc 1)}""'),
("[inner]", '#f "[{"inner"}]""'),
],
)
def test_legal_fstring(self, v: str, raw: str):
assert v == read_str_first(raw)

def test_only_one_expr_allowed(self):
with pytest.raises(reader.SyntaxError):
read_str_first(r'#f "one {(+ 1 2) :a} three"')

def test_invalid_escape(self):
with pytest.raises(reader.SyntaxError):
read_str_first(r'#f "\q"')

def test_missing_expression(self):
with pytest.raises(reader.SyntaxError):
read_str_first('#f "some val {} with no expr"')

def test_missing_terminating_quote(self):
with pytest.raises(reader.SyntaxError):
read_str_first('#f "Start of a format string')


class TestByteString:
def test_must_include_quote(self):
with pytest.raises(reader.SyntaxError):
Expand Down