Add PEP 822 d-string (dedented multiline string) support

claude · claude · commit 467bd3a761e7 · 2026-02-01T04:53:46.000Z
Implements the `d` string prefix that automatically removes common indentation from triple-quoted strings at compile time. Supports all prefix combinations and orderings: d, dr/rd, db/bd, df/fd, dt/td, and three-prefix variants like dfr, rdb, etc. Closes #892 https://claude.ai/code/session_01Lu9bsuRrPN8R1yVJt5T7oj
diff --git a/coconut/compiler/compiler.py b/coconut/compiler/compiler.py
@@ -891,6 +891,10 @@ def bind(cls):
         cls.ellipsis <<= attach(cls.ellipsis_tokens, cls.method("ellipsis_handle"))
         cls.f_string <<= attach(cls.f_string_tokens, cls.method("f_string_handle"))
         cls.t_string <<= attach(cls.t_string_tokens, cls.method("t_string_handle"))
+        cls.d_string <<= attach(cls.d_string_tokens, cls.method("d_string_handle"))
+        cls.db_string <<= attach(cls.db_string_tokens, cls.method("db_string_handle"))
+        cls.df_string <<= attach(cls.df_string_tokens, cls.method("df_string_handle"))
+        cls.dt_string <<= attach(cls.dt_string_tokens, cls.method("dt_string_handle"))
         cls.funcname_typeparams <<= attach(cls.funcname_typeparams_tokens, cls.method("funcname_typeparams_handle"))
 
         # standard handlers of the form name <<= attach(name_ref, method("name_handle"))
@@ -4778,6 +4782,187 @@ def t_string_handle(self, original, loc, tokens):
         """Process Python 3.14 template strings."""
         return self.f_string_handle(original, loc, tokens, is_t=True)
 
+    @staticmethod
+    def _d_string_dedent(text, loc):
+        """Apply PEP 822 dedentation to string contents.
+        The text must start with a newline (the required newline after opening quotes)."""
+        if not text.startswith("\n"):
+            raise CoconutDeferredSyntaxError("d-string contents must start with a newline after opening quotes", loc)
+        text = text[1:]  # remove leading newline (not included in result)
+
+        lines = text.split("\n")
+
+        # determine common indentation
+        # blank lines are ignored except the last line (closing quotes line)
+        indent = None
+        for i, line in enumerate(lines):
+            is_last = (i == len(lines) - 1)
+            if not is_last and line.strip() == "":
+                continue
+            stripped = line.lstrip()
+            line_indent = line[:len(line) - len(stripped)]
+            if indent is None:
+                indent = line_indent
+            else:
+                common = ""
+                for a, b in zip(indent, line_indent):
+                    if a == b:
+                        common += a
+                    else:
+                        break
+                indent = common
+
+        if indent is None:
+            indent = ""
+
+        # apply dedentation
+        result_lines = []
+        for i, line in enumerate(lines):
+            is_last = (i == len(lines) - 1)
+            if line.strip() == "" and not is_last:
+                result_lines.append("")
+            elif line.startswith(indent):
+                result_lines.append(line[len(indent):])
+            elif indent.startswith(line) and line.strip() == "":
+                result_lines.append("")
+            else:
+                raise CoconutDeferredSyntaxError("inconsistent indentation in d-string", loc)
+
+        return "\n".join(result_lines)
+
+    def d_string_handle(self, original, loc, tokens):
+        """Process PEP 822 d-strings (dedented strings)."""
+        string, = tokens
+
+        # strip raw r
+        raw = string.startswith("r")
+        if raw:
+            string = string[1:]
+
+        # unwrap string ref
+        internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid d string item", string)
+        text, strchar = self.get_ref("str", string[1:-1])
+
+        # must be triple-quoted
+        if len(strchar) == 1:
+            raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc)
+
+        # apply dedentation
+        text = self._d_string_dedent(text, loc)
+
+        return ("r" if raw else "") + self.wrap_str(text, strchar[0], multiline=True)
+
+    def db_string_handle(self, original, loc, tokens):
+        """Process d-string with b prefix."""
+        string, = tokens
+
+        # strip raw r and b prefix
+        raw = False
+        if string.startswith("r"):
+            raw = True
+            string = string[1:]
+        has_b = string.startswith("b") or string.startswith("B")
+        if has_b:
+            string = string[1:]
+        if string.startswith("r"):
+            raw = True
+            string = string[1:]
+
+        # unwrap string ref
+        internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid db string item", string)
+        text, strchar = self.get_ref("str", string[1:-1])
+
+        # must be triple-quoted
+        if len(strchar) == 1:
+            raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc)
+
+        # apply dedentation
+        text = self._d_string_dedent(text, loc)
+
+        return "b" + ("r" if raw else "") + self.wrap_str(text, strchar[0], multiline=True)
+
+    def df_string_handle(self, original, loc, tokens):
+        """Process d-string with f prefix."""
+        return self._d_f_string_handle(original, loc, tokens, is_t=False)
+
+    def dt_string_handle(self, original, loc, tokens):
+        """Process d-string with t prefix."""
+        return self._d_f_string_handle(original, loc, tokens, is_t=True)
+
+    def _d_f_string_handle(self, original, loc, tokens, is_t=False):
+        """Process d-string combined with f or t prefix."""
+        string, = tokens
+
+        # strip raw r
+        raw = string.startswith("r")
+        if raw:
+            string = string[1:]
+
+        # unwrap f-string ref
+        internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid df string item", string)
+        strchar, string_parts, exprs = self.get_ref("f_str", string[1:-1])
+
+        # must be triple-quoted
+        if len(strchar) == 1:
+            raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc)
+
+        # apply dedentation to the f-string parts
+        # reconstruct with placeholders for expressions
+        placeholder = "\x00"
+        full_text = placeholder.join(string_parts)
+
+        if not full_text.startswith("\n"):
+            raise CoconutDeferredSyntaxError("d-string contents must start with a newline after opening quotes", loc)
+        full_text = full_text[1:]
+
+        lines = full_text.split("\n")
+
+        # determine common indentation (treat placeholders as non-whitespace)
+        indent = None
+        for i, line in enumerate(lines):
+            is_last = (i == len(lines) - 1)
+            line_no_ph = line.replace(placeholder, "X")
+            if not is_last and line_no_ph.strip() == "":
+                continue
+            stripped = line_no_ph.lstrip()
+            line_indent = line_no_ph[:len(line_no_ph) - len(stripped)]
+            if indent is None:
+                indent = line_indent
+            else:
+                common = ""
+                for a, b in zip(indent, line_indent):
+                    if a == b:
+                        common += a
+                    else:
+                        break
+                indent = common
+
+        if indent is None:
+            indent = ""
+
+        # apply dedentation
+        result_lines = []
+        for i, line in enumerate(lines):
+            is_last = (i == len(lines) - 1)
+            line_no_ph = line.replace(placeholder, "X")
+            if line_no_ph.strip() == "" and not is_last:
+                result_lines.append("")
+            elif line.startswith(indent):
+                result_lines.append(line[len(indent):])
+            elif indent.startswith(line_no_ph) and line_no_ph.strip() == "":
+                result_lines.append("")
+            else:
+                raise CoconutDeferredSyntaxError("inconsistent indentation in d-string", loc)
+
+        dedented = "\n".join(result_lines)
+        new_parts = dedented.split(placeholder)
+
+        # now delegate to f_string_handle with modified parts
+        # re-wrap as f-string ref and call f_string_handle
+        new_ref = self.wrap_f_str(strchar, new_parts, exprs)
+        new_token = ("r" if raw else "") + new_ref
+        return self.f_string_handle(original, loc, [new_token], is_t=is_t)
+
     def decorators_handle(self, loc, tokens):
         """Process decorators."""
         defs = []
diff --git a/coconut/compiler/grammar.py b/coconut/compiler/grammar.py
@@ -935,12 +935,17 @@ class Grammar(object):
         u_string = Forward()
         f_string = Forward()
         t_string = Forward()
+        d_string = Forward()
+        db_string = Forward()
+        df_string = Forward()
+        dt_string = Forward()
 
         bit_b = caseless_literal("b")
         raw_r = caseless_literal("r")
         unicode_u = caseless_literal("u", suppress=True)
         format_f = caseless_literal("f", suppress=True)
         template_t = caseless_literal("t", suppress=True)
+        dedent_d = caseless_literal("d", suppress=True)
 
         string = combine(Optional(raw_r) + string_item)
         # Python 2 only supports br"..." not rb"..."
@@ -949,9 +954,14 @@ class Grammar(object):
         u_string_ref = combine(unicode_u + string_item)
         f_string_tokens = combine((format_f + Optional(raw_r) | raw_r + format_f) + string_item)
         t_string_tokens = combine((template_t + Optional(raw_r) | raw_r + template_t) + string_item)
-        nonbf_string = string | u_string
-        nonb_string = nonbf_string | f_string | t_string
-        any_string = nonb_string | b_string
+        # d-string (PEP 822) dedented string variants
+        d_string_tokens = combine(any_len_perm(raw_r, required=(dedent_d,)) + string_item)
+        db_string_tokens = combine(any_len_perm(raw_r, required=(dedent_d, bit_b)) + string_item)
+        df_string_tokens = combine(any_len_perm(raw_r, required=(dedent_d, format_f)) + string_item)
+        dt_string_tokens = combine(any_len_perm(raw_r, required=(dedent_d, template_t)) + string_item)
+        nonbf_string = string | u_string | d_string
+        nonb_string = nonbf_string | f_string | t_string | df_string | dt_string
+        any_string = nonb_string | b_string | db_string
         moduledoc = any_string + newline
         docstring = condense(moduledoc)
 
@@ -1342,10 +1352,10 @@ class Grammar(object):
         )
 
         string_atom = Forward()
-        string_atom_ref = OneOrMore(nonb_string) | OneOrMore(b_string)
-        fixed_len_string_tokens = OneOrMore(nonbf_string) | OneOrMore(b_string)
+        string_atom_ref = OneOrMore(nonb_string) | OneOrMore(b_string | db_string)
+        fixed_len_string_tokens = OneOrMore(nonbf_string) | OneOrMore(b_string | db_string)
         f_string_atom = Forward()
-        f_string_atom_ref = ZeroOrMore(nonbf_string) + f_string + ZeroOrMore(nonb_string)
+        f_string_atom_ref = ZeroOrMore(nonbf_string) + (f_string | df_string | dt_string) + ZeroOrMore(nonb_string)
 
         keyword_atom = any_keyword_in(const_vars)
         passthrough_atom = addspace(OneOrMore(passthrough_item))
@@ -2897,8 +2907,8 @@ class Grammar(object):
             | fixto(end_of_line, "misplaced newline (maybe missing ':')")
         )
 
-        start_f_str_regex = compile_regex(r"\br?[ft]r?$")
-        start_f_str_regex_len = 4
+        start_f_str_regex = compile_regex(r"\b[dr]{0,2}[ft][dr]{0,2}$")
+        start_f_str_regex_len = 5
 
         end_f_str_expr = StartOfStrGrammar(combine(rbrace | colon | bang).leaveWhitespace())
 
diff --git a/coconut/tests/src/cocotest/agnostic/primary_2.coco b/coconut/tests/src/cocotest/agnostic/primary_2.coco
@@ -597,4 +597,37 @@ def primary_test_2() -> bool:
     assert final_outer_fn() == 5
     assert final_nested == 1
 
+    # d-string (PEP 822) tests
+    assert d"""
+        Hello
+        World!
+        """ == "Hello\nWorld!\n"
+    assert d"""
+        Hello
+        World!""" == "Hello\nWorld!"
+    assert d"""
+        Hello
+
+        World!
+        """ == "Hello\n\nWorld!\n"
+    assert d"""
+          Hello
+        World!
+        """ == "  Hello\nWorld!\n"
+    assert d"""
+        Hello
+    """ == "    Hello\n"
+    assert dr"""
+        Hello\n
+        World!
+        """ == "Hello\\n\nWorld!\n"
+    assert db"""
+        Hello
+        World!
+        """ == b"Hello\nWorld!\n"
+    name = "World"
+    assert df"""
+        Hello, {name}!
+        """ == "Hello, World!\n"
+
     return True