diff --git a/CHANGELOG.md b/CHANGELOG.md index b3bbe64c..a03ccd9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Added the `basilisp.url` namespace for structured URL manipulation (#1239) * Added support for proxies (#425) * Added a `:slots` meta flag for `deftype` to disable creation of `__slots__` on created types (#1241) + * Added support for f-strings (#922) ### Changed * Removed implicit support for single-use iterables in sequences, and introduced `iterator-seq` to expliciltly handle them (#1192) diff --git a/docs/reader.rst b/docs/reader.rst index bcdf3107..9295fc96 100644 --- a/docs/reader.rst +++ b/docs/reader.rst @@ -155,6 +155,28 @@ Their meanings match the equivalent escape sequences supported in `Python string :ref:`strings_and_byte_strings` +.. _reader_f_strings: + +f-strings +^^^^^^^^^ + +:: + + basilisp.user=> #f "" + "" + basilisp.user=> (let [a 1] #f "this is a string with {(inc a)}") + "this is a string with 2" + basilisp.user=> (let [a 1] #f "this is a string with \{(inc a)}") + "this is a string with {(inc a)}" + +f-strings are denoted as a series of characters enclosed by ``"`` quotation marks and preceded by a ``#f``. +Expressions may be interpolated in the string enclosed in ``{}``. +Each interpolation must contain exactly 1 expression and may be surrounded by optional whitespace characters which will not be included in the final string. +Any valid expression may appear in a string interpolation, including another string. +To include a literal opening ``{`` character, it must be escaped as ``\{``. + +f-strings are otherwise identical to standard :ref:`string literals `. + .. _reader_byte_strings: Byte Strings diff --git a/src/basilisp/lang/reader.py b/src/basilisp/lang/reader.py index 93834578..2de2d7de 100644 --- a/src/basilisp/lang/reader.py +++ b/src/basilisp/lang/reader.py @@ -105,6 +105,7 @@ _HASH_SET = sym.symbol("hash-set", ns="basilisp.core") _LIST = sym.symbol("list", ns="basilisp.core") _SEQ = sym.symbol("seq", ns="basilisp.core") +_STR = sym.symbol("str", ns="basilisp.core") _UNQUOTE = sym.symbol("unquote", ns="basilisp.core") _UNQUOTE_SPLICING = sym.symbol("unquote-splicing", ns="basilisp.core") _VECTOR = sym.symbol("vector", ns="basilisp.core") @@ -599,6 +600,14 @@ def with_lineno_and_col(ctx, **kwargs): return cast(W, with_lineno_and_col) +def _consume_whitespace(ctx: ReaderContext) -> str: + reader = ctx.reader + char = reader.peek() + while whitespace_chars.match(char): + char = reader.next_char() + return char + + def _read_namespaced( ctx: ReaderContext, allowed_suffix: Optional[str] = None ) -> tuple[Optional[str], str]: @@ -831,9 +840,7 @@ def _read_namespaced_map(ctx: ReaderContext) -> lmap.PersistentMap: "be specified as keywords without namespaces" ) - char = ctx.reader.peek() - while whitespace_chars.match(char): - char = ctx.reader.next_char() + _consume_whitespace(ctx) return _read_map(ctx, namespace=map_ns) @@ -966,6 +973,49 @@ def _read_str(ctx: ReaderContext, allow_arbitrary_escapes: bool = False) -> str: s.append(char) +def _read_fstr(ctx: ReaderContext) -> Union[str, llist.PersistentList]: + """Return a UTF-8 encoded string from the input stream.""" + elems: list[LispReaderForm] = [] + s: list[str] = [] + reader = ctx.reader + + _consume_whitespace(ctx) + + while True: + char = reader.next_char() + if char == "": + raise ctx.eof_error("Unexpected EOF in string") + if char == "\\": + char = reader.next_char() + escape_char = _STR_ESCAPE_CHARS.get(char, None) + if escape_char: + s.append(escape_char) + continue + if char == "{": + s.append(char) + continue + raise ctx.syntax_error(f"Unknown escape sequence: \\{char}") + if char == '"': + reader.next_char() + elems.append("".join(s)) + if all(isinstance(elem, str) for elem in elems): + return "".join(cast(list[str], elems)) + else: + return llist.list([_STR, *elems]) + if char == "{": + reader.next_char() + elems.append("".join(s)) + s = [] + expr = _read_next(ctx) + elems.append(expr) + char = _consume_whitespace(ctx) + if char != "}": + raise ctx.syntax_error("Expected single expression in f-string") + continue + + s.append(char) + + _BYTES_ESCAPE_CHARS = { '"': b'"', "\\": b"\\", @@ -1000,9 +1050,7 @@ def _read_byte_str(ctx: ReaderContext) -> bytes: """ reader = ctx.reader - char = reader.peek() - while whitespace_chars.match(char): - char = reader.next_char() + char = _consume_whitespace(ctx) if char != '"': raise ctx.syntax_error(f"Expected '\"'; got '{char}' instead") @@ -1681,8 +1729,11 @@ def _read_reader_macro(ctx: ReaderContext) -> LispReaderForm: elif ns_name_chars.match(char): s = _read_sym(ctx, is_reader_macro_sym=True) assert isinstance(s, sym.Symbol) - if s.ns is None and s.name == "b": - return _read_byte_str(ctx) + if s.ns is None: + if s.name == "b": + return _read_byte_str(ctx) + elif s.name == "f": + return _read_fstr(ctx) v = _read_next_consuming_comment(ctx) @@ -1724,10 +1775,7 @@ def _read_next_consuming_comment(ctx: ReaderContext) -> RawReaderForm: def _read_next_consuming_whitespace(ctx: ReaderContext) -> LispReaderForm: """Read the next full form from the input stream, consuming any whitespace.""" - reader = ctx.reader - char = reader.peek() - while whitespace_chars.match(char): - char = reader.next_char() + _consume_whitespace(ctx) return _read_next(ctx) diff --git a/tests/basilisp/reader_test.py b/tests/basilisp/reader_test.py index 72d579f9..fff7b793 100644 --- a/tests/basilisp/reader_test.py +++ b/tests/basilisp/reader_test.py @@ -746,6 +746,91 @@ def test_missing_terminating_quote(self): read_str_first('"Start of a string') +class TestFormatString: + def test_must_include_quote(self): + with pytest.raises(reader.SyntaxError): + read_str_first(r"#f []") + + @pytest.mark.parametrize( + "v,raw", + [ + ("", '#f ""'), + ('"', r'#f "\""'), + ("\\", r'#f "\\"'), + ("\a", r'#f "\a"'), + ("\b", r'#f "\b"'), + ("\f", r'#f "\f"'), + ("\n", r'#f "\n"'), + ("\r", r'#f "\r"'), + ("\t", r'#f "\t"'), + ("\v", r'#f "\v"'), + ("Hello,\nmy name is\tChris.", r'#f "Hello,\nmy name is\tChris."'), + ("Regular string", '#f "Regular string"'), + ("String with 'inner string'", "#f \"String with 'inner string'\""), + ('String with "inner string"', r'#f "String with \"inner string\""'), + ], + ) + def test_legal_string_is_legal_fstring(self, v: str, raw: str): + assert v == read_str_first(raw) + + @pytest.mark.parametrize( + "v,raw", + [ + ( + llist.l( + reader._STR, "[", kw.keyword("whitespace", ns="surrounded.by"), "]" + ), + '#f "[{ :surrounded.by/whitespace }]""', + ), + (llist.l(reader._STR, "[", None, "]"), '#f "[{nil}]""'), + (llist.l(reader._STR, "[", True, "]"), '#f "[{true}]""'), + (llist.l(reader._STR, "[", False, "]"), '#f "[{false}]""'), + (llist.l(reader._STR, "[", 0, "]"), '#f "[{0}]""'), + (llist.l(reader._STR, "[", 0.1, "]"), '#f "[{0.1}]""'), + (llist.l(reader._STR, "[", kw.keyword("a"), "]"), '#f "[{:a}]""'), + (llist.l(reader._STR, "[", sym.symbol("sym"), "]"), '#f "[{sym}]""'), + ( + llist.l( + reader._STR, "[", llist.l(reader._QUOTE, sym.symbol("sym")), "]" + ), + '#f "[{\'sym}]""', + ), + (llist.l(reader._STR, "[", vec.EMPTY, "]"), '#f "[{[]}]""'), + (llist.l(reader._STR, "[", vec.v("string"), "]"), '#f "[{["string"]}]""'), + (llist.l(reader._STR, "[", llist.EMPTY, "]"), '#f "[{()}]""'), + (llist.l(reader._STR, "[", llist.l("string"), "]"), '#f "[{("string")}]""'), + (llist.l(reader._STR, "[", lset.EMPTY, "]"), '#f "[{#{}}]""'), + (llist.l(reader._STR, "[", lset.s("string"), "]"), '#f "[{#{"string"}}]""'), + (llist.l(reader._STR, "[", lmap.EMPTY, "]"), '#f "[{{}}]""'), + ( + llist.l(reader._STR, "[", lmap.map({kw.keyword("a"): "string"}), "]"), + '#f "[{{:a "string"}}]""', + ), + ("{}", r'#f "\{}""'), + ("{(inc 1)}", r'#f "\{(inc 1)}""'), + ("[inner]", '#f "[{"inner"}]""'), + ], + ) + def test_legal_fstring(self, v: str, raw: str): + assert v == read_str_first(raw) + + def test_only_one_expr_allowed(self): + with pytest.raises(reader.SyntaxError): + read_str_first(r'#f "one {(+ 1 2) :a} three"') + + def test_invalid_escape(self): + with pytest.raises(reader.SyntaxError): + read_str_first(r'#f "\q"') + + def test_missing_expression(self): + with pytest.raises(reader.SyntaxError): + read_str_first('#f "some val {} with no expr"') + + def test_missing_terminating_quote(self): + with pytest.raises(reader.SyntaxError): + read_str_first('#f "Start of a format string') + + class TestByteString: def test_must_include_quote(self): with pytest.raises(reader.SyntaxError):