basilisp-lang · chrisrink10 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  * Added the `basilisp.url` namespace for structured URL manipulation (#1239)
  * Added support for proxies (#425)
  * Added a `:slots` meta flag for `deftype` to disable creation of `__slots__` on created types (#1241)
+ * Added support for f-strings (#922)
 
 ### Changed
  * Removed implicit support for single-use iterables in sequences, and introduced `iterator-seq` to expliciltly handle them (#1192)

diff --git a/docs/reader.rst b/docs/reader.rst
@@ -155,6 +155,28 @@ Their meanings match the equivalent escape sequences supported in `Python string
 
    :ref:`strings_and_byte_strings`
 
+.. _reader_f_strings:
+
+f-strings
+^^^^^^^^^
+
+::
+
+    basilisp.user=> #f ""
+    ""
+    basilisp.user=> (let [a 1] #f "this is a string with {(inc a)}")
+    "this is a string with 2"
+    basilisp.user=> (let [a 1] #f "this is a string with \{(inc a)}")
+    "this is a string with {(inc a)}"
+
+f-strings are denoted as a series of characters enclosed by ``"`` quotation marks and preceded by a ``#f``.
+Expressions may be interpolated in the string enclosed in ``{}``.
+Each interpolation must contain exactly 1 expression and may be surrounded by optional whitespace characters which will not be included in the final string.
+Any valid expression may appear in a string interpolation, including another string.
+To include a literal opening ``{`` character, it must be escaped as ``\{``.
+
+f-strings are otherwise identical to standard :ref:`string literals <reader_strings>`.
+
 .. _reader_byte_strings:
 
 Byte Strings

diff --git a/src/basilisp/lang/reader.py b/src/basilisp/lang/reader.py
@@ -105,6 +105,7 @@
 _HASH_SET = sym.symbol("hash-set", ns="basilisp.core")
 _LIST = sym.symbol("list", ns="basilisp.core")
 _SEQ = sym.symbol("seq", ns="basilisp.core")
+_STR = sym.symbol("str", ns="basilisp.core")
 _UNQUOTE = sym.symbol("unquote", ns="basilisp.core")
 _UNQUOTE_SPLICING = sym.symbol("unquote-splicing", ns="basilisp.core")
 _VECTOR = sym.symbol("vector", ns="basilisp.core")
@@ -599,6 +600,14 @@ def with_lineno_and_col(ctx, **kwargs):
     return cast(W, with_lineno_and_col)
 
 
+def _consume_whitespace(ctx: ReaderContext) -> str:
+    reader = ctx.reader
+    char = reader.peek()
+    while whitespace_chars.match(char):
+        char = reader.next_char()
+    return char
+
+
 def _read_namespaced(
     ctx: ReaderContext, allowed_suffix: Optional[str] = None
 ) -> tuple[Optional[str], str]:
@@ -831,9 +840,7 @@ def _read_namespaced_map(ctx: ReaderContext) -> lmap.PersistentMap:
                 "be specified as keywords without namespaces"
             )
 
-    char = ctx.reader.peek()
-    while whitespace_chars.match(char):
-        char = ctx.reader.next_char()
+    _consume_whitespace(ctx)
 
     return _read_map(ctx, namespace=map_ns)
 
@@ -966,6 +973,49 @@ def _read_str(ctx: ReaderContext, allow_arbitrary_escapes: bool = False) -> str:
         s.append(char)
 
 
+def _read_fstr(ctx: ReaderContext) -> Union[str, llist.PersistentList]:
+    """Return a UTF-8 encoded string from the input stream."""
+    elems: list[LispReaderForm] = []
+    s: list[str] = []
+    reader = ctx.reader
+
+    _consume_whitespace(ctx)
+
+    while True:
+        char = reader.next_char()
+        if char == "":
+            raise ctx.eof_error("Unexpected EOF in string")
+        if char == "\\":
+            char = reader.next_char()
+            escape_char = _STR_ESCAPE_CHARS.get(char, None)
+            if escape_char:
+                s.append(escape_char)
+                continue
+            if char == "{":
+                s.append(char)
+                continue
+            raise ctx.syntax_error(f"Unknown escape sequence: \\{char}")
+        if char == '"':
+            reader.next_char()
+            elems.append("".join(s))
+            if all(isinstance(elem, str) for elem in elems):
+                return "".join(cast(list[str], elems))
+            else:
+                return llist.list([_STR, *elems])
+        if char == "{":
+            reader.next_char()
+            elems.append("".join(s))
+            s = []
+            expr = _read_next(ctx)
+            elems.append(expr)
+            char = _consume_whitespace(ctx)
+            if char != "}":
+                raise ctx.syntax_error("Expected single expression in f-string")
+            continue
+
+        s.append(char)
+
+
 _BYTES_ESCAPE_CHARS = {
     '"': b'"',
     "\\": b"\\",
@@ -1000,9 +1050,7 @@ def _read_byte_str(ctx: ReaderContext) -> bytes:
     """
     reader = ctx.reader
 
-    char = reader.peek()
-    while whitespace_chars.match(char):
-        char = reader.next_char()
+    char = _consume_whitespace(ctx)
 
     if char != '"':
         raise ctx.syntax_error(f"Expected '\"'; got '{char}' instead")
@@ -1681,8 +1729,11 @@ def _read_reader_macro(ctx: ReaderContext) -> LispReaderForm:
     elif ns_name_chars.match(char):
         s = _read_sym(ctx, is_reader_macro_sym=True)
         assert isinstance(s, sym.Symbol)
-        if s.ns is None and s.name == "b":
-            return _read_byte_str(ctx)
+        if s.ns is None:
+            if s.name == "b":
+                return _read_byte_str(ctx)
+            elif s.name == "f":
+                return _read_fstr(ctx)
 
         v = _read_next_consuming_comment(ctx)
 
@@ -1724,10 +1775,7 @@ def _read_next_consuming_comment(ctx: ReaderContext) -> RawReaderForm:
 
 def _read_next_consuming_whitespace(ctx: ReaderContext) -> LispReaderForm:
     """Read the next full form from the input stream, consuming any whitespace."""
-    reader = ctx.reader
-    char = reader.peek()
-    while whitespace_chars.match(char):
-        char = reader.next_char()
+    _consume_whitespace(ctx)
     return _read_next(ctx)
 
 

diff --git a/tests/basilisp/reader_test.py b/tests/basilisp/reader_test.py
@@ -746,6 +746,91 @@ def test_missing_terminating_quote(self):
             read_str_first('"Start of a string')
 
 
+class TestFormatString:
+    def test_must_include_quote(self):
+        with pytest.raises(reader.SyntaxError):
+            read_str_first(r"#f []")
+
+    @pytest.mark.parametrize(
+        "v,raw",
+        [
+            ("", '#f ""'),
+            ('"', r'#f "\""'),
+            ("\\", r'#f "\\"'),
+            ("\a", r'#f "\a"'),
+            ("\b", r'#f "\b"'),
+            ("\f", r'#f "\f"'),
+            ("\n", r'#f "\n"'),
+            ("\r", r'#f "\r"'),
+            ("\t", r'#f "\t"'),
+            ("\v", r'#f "\v"'),
+            ("Hello,\nmy name is\tChris.", r'#f "Hello,\nmy name is\tChris."'),
+            ("Regular string", '#f "Regular string"'),
+            ("String with 'inner string'", "#f \"String with 'inner string'\""),
+            ('String with "inner string"', r'#f "String with \"inner string\""'),
+        ],
+    )
+    def test_legal_string_is_legal_fstring(self, v: str, raw: str):
+        assert v == read_str_first(raw)
+
+    @pytest.mark.parametrize(
+        "v,raw",
+        [
+            (
+                llist.l(
+                    reader._STR, "[", kw.keyword("whitespace", ns="surrounded.by"), "]"
+                ),
+                '#f "[{  :surrounded.by/whitespace   }]""',
+            ),
+            (llist.l(reader._STR, "[", None, "]"), '#f "[{nil}]""'),
+            (llist.l(reader._STR, "[", True, "]"), '#f "[{true}]""'),
+            (llist.l(reader._STR, "[", False, "]"), '#f "[{false}]""'),
+            (llist.l(reader._STR, "[", 0, "]"), '#f "[{0}]""'),
+            (llist.l(reader._STR, "[", 0.1, "]"), '#f "[{0.1}]""'),
+            (llist.l(reader._STR, "[", kw.keyword("a"), "]"), '#f "[{:a}]""'),
+            (llist.l(reader._STR, "[", sym.symbol("sym"), "]"), '#f "[{sym}]""'),
+            (
+                llist.l(
+                    reader._STR, "[", llist.l(reader._QUOTE, sym.symbol("sym")), "]"
+                ),
+                '#f "[{\'sym}]""',
+            ),
+            (llist.l(reader._STR, "[", vec.EMPTY, "]"), '#f "[{[]}]""'),
+            (llist.l(reader._STR, "[", vec.v("string"), "]"), '#f "[{["string"]}]""'),
+            (llist.l(reader._STR, "[", llist.EMPTY, "]"), '#f "[{()}]""'),
+            (llist.l(reader._STR, "[", llist.l("string"), "]"), '#f "[{("string")}]""'),
+            (llist.l(reader._STR, "[", lset.EMPTY, "]"), '#f "[{#{}}]""'),
+            (llist.l(reader._STR, "[", lset.s("string"), "]"), '#f "[{#{"string"}}]""'),
+            (llist.l(reader._STR, "[", lmap.EMPTY, "]"), '#f "[{{}}]""'),
+            (
+                llist.l(reader._STR, "[", lmap.map({kw.keyword("a"): "string"}), "]"),
+                '#f "[{{:a "string"}}]""',
+            ),
+            ("{}", r'#f "\{}""'),
+            ("{(inc 1)}", r'#f "\{(inc 1)}""'),
+            ("[inner]", '#f "[{"inner"}]""'),
+        ],
+    )
+    def test_legal_fstring(self, v: str, raw: str):
+        assert v == read_str_first(raw)
+
+    def test_only_one_expr_allowed(self):
+        with pytest.raises(reader.SyntaxError):
+            read_str_first(r'#f "one {(+ 1 2) :a} three"')
+
+    def test_invalid_escape(self):
+        with pytest.raises(reader.SyntaxError):
+            read_str_first(r'#f "\q"')
+
+    def test_missing_expression(self):
+        with pytest.raises(reader.SyntaxError):
+            read_str_first('#f "some val {} with no expr"')
+
+    def test_missing_terminating_quote(self):
+        with pytest.raises(reader.SyntaxError):
+            read_str_first('#f "Start of a format string')
+
+
 class TestByteString:
     def test_must_include_quote(self):
         with pytest.raises(reader.SyntaxError):