Skip to content

Commit 467bd3a

Browse files
committed
Add PEP 822 d-string (dedented multiline string) support
Implements the `d` string prefix that automatically removes common indentation from triple-quoted strings at compile time. Supports all prefix combinations and orderings: d, dr/rd, db/bd, df/fd, dt/td, and three-prefix variants like dfr, rdb, etc. Closes #892 https://claude.ai/code/session_01Lu9bsuRrPN8R1yVJt5T7oj
1 parent abbb6dc commit 467bd3a

File tree

3 files changed

+236
-8
lines changed

3 files changed

+236
-8
lines changed

coconut/compiler/compiler.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,10 @@ def bind(cls):
891891
cls.ellipsis <<= attach(cls.ellipsis_tokens, cls.method("ellipsis_handle"))
892892
cls.f_string <<= attach(cls.f_string_tokens, cls.method("f_string_handle"))
893893
cls.t_string <<= attach(cls.t_string_tokens, cls.method("t_string_handle"))
894+
cls.d_string <<= attach(cls.d_string_tokens, cls.method("d_string_handle"))
895+
cls.db_string <<= attach(cls.db_string_tokens, cls.method("db_string_handle"))
896+
cls.df_string <<= attach(cls.df_string_tokens, cls.method("df_string_handle"))
897+
cls.dt_string <<= attach(cls.dt_string_tokens, cls.method("dt_string_handle"))
894898
cls.funcname_typeparams <<= attach(cls.funcname_typeparams_tokens, cls.method("funcname_typeparams_handle"))
895899

896900
# standard handlers of the form name <<= attach(name_ref, method("name_handle"))
@@ -4778,6 +4782,187 @@ def t_string_handle(self, original, loc, tokens):
47784782
"""Process Python 3.14 template strings."""
47794783
return self.f_string_handle(original, loc, tokens, is_t=True)
47804784

4785+
@staticmethod
4786+
def _d_string_dedent(text, loc):
4787+
"""Apply PEP 822 dedentation to string contents.
4788+
The text must start with a newline (the required newline after opening quotes)."""
4789+
if not text.startswith("\n"):
4790+
raise CoconutDeferredSyntaxError("d-string contents must start with a newline after opening quotes", loc)
4791+
text = text[1:] # remove leading newline (not included in result)
4792+
4793+
lines = text.split("\n")
4794+
4795+
# determine common indentation
4796+
# blank lines are ignored except the last line (closing quotes line)
4797+
indent = None
4798+
for i, line in enumerate(lines):
4799+
is_last = (i == len(lines) - 1)
4800+
if not is_last and line.strip() == "":
4801+
continue
4802+
stripped = line.lstrip()
4803+
line_indent = line[:len(line) - len(stripped)]
4804+
if indent is None:
4805+
indent = line_indent
4806+
else:
4807+
common = ""
4808+
for a, b in zip(indent, line_indent):
4809+
if a == b:
4810+
common += a
4811+
else:
4812+
break
4813+
indent = common
4814+
4815+
if indent is None:
4816+
indent = ""
4817+
4818+
# apply dedentation
4819+
result_lines = []
4820+
for i, line in enumerate(lines):
4821+
is_last = (i == len(lines) - 1)
4822+
if line.strip() == "" and not is_last:
4823+
result_lines.append("")
4824+
elif line.startswith(indent):
4825+
result_lines.append(line[len(indent):])
4826+
elif indent.startswith(line) and line.strip() == "":
4827+
result_lines.append("")
4828+
else:
4829+
raise CoconutDeferredSyntaxError("inconsistent indentation in d-string", loc)
4830+
4831+
return "\n".join(result_lines)
4832+
4833+
def d_string_handle(self, original, loc, tokens):
4834+
"""Process PEP 822 d-strings (dedented strings)."""
4835+
string, = tokens
4836+
4837+
# strip raw r
4838+
raw = string.startswith("r")
4839+
if raw:
4840+
string = string[1:]
4841+
4842+
# unwrap string ref
4843+
internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid d string item", string)
4844+
text, strchar = self.get_ref("str", string[1:-1])
4845+
4846+
# must be triple-quoted
4847+
if len(strchar) == 1:
4848+
raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc)
4849+
4850+
# apply dedentation
4851+
text = self._d_string_dedent(text, loc)
4852+
4853+
return ("r" if raw else "") + self.wrap_str(text, strchar[0], multiline=True)
4854+
4855+
def db_string_handle(self, original, loc, tokens):
4856+
"""Process d-string with b prefix."""
4857+
string, = tokens
4858+
4859+
# strip raw r and b prefix
4860+
raw = False
4861+
if string.startswith("r"):
4862+
raw = True
4863+
string = string[1:]
4864+
has_b = string.startswith("b") or string.startswith("B")
4865+
if has_b:
4866+
string = string[1:]
4867+
if string.startswith("r"):
4868+
raw = True
4869+
string = string[1:]
4870+
4871+
# unwrap string ref
4872+
internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid db string item", string)
4873+
text, strchar = self.get_ref("str", string[1:-1])
4874+
4875+
# must be triple-quoted
4876+
if len(strchar) == 1:
4877+
raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc)
4878+
4879+
# apply dedentation
4880+
text = self._d_string_dedent(text, loc)
4881+
4882+
return "b" + ("r" if raw else "") + self.wrap_str(text, strchar[0], multiline=True)
4883+
4884+
def df_string_handle(self, original, loc, tokens):
4885+
"""Process d-string with f prefix."""
4886+
return self._d_f_string_handle(original, loc, tokens, is_t=False)
4887+
4888+
def dt_string_handle(self, original, loc, tokens):
4889+
"""Process d-string with t prefix."""
4890+
return self._d_f_string_handle(original, loc, tokens, is_t=True)
4891+
4892+
def _d_f_string_handle(self, original, loc, tokens, is_t=False):
4893+
"""Process d-string combined with f or t prefix."""
4894+
string, = tokens
4895+
4896+
# strip raw r
4897+
raw = string.startswith("r")
4898+
if raw:
4899+
string = string[1:]
4900+
4901+
# unwrap f-string ref
4902+
internal_assert(string.startswith(strwrapper) and string.endswith(unwrapper), "invalid df string item", string)
4903+
strchar, string_parts, exprs = self.get_ref("f_str", string[1:-1])
4904+
4905+
# must be triple-quoted
4906+
if len(strchar) == 1:
4907+
raise CoconutDeferredSyntaxError("d-string prefix requires triple-quoted string", loc)
4908+
4909+
# apply dedentation to the f-string parts
4910+
# reconstruct with placeholders for expressions
4911+
placeholder = "\x00"
4912+
full_text = placeholder.join(string_parts)
4913+
4914+
if not full_text.startswith("\n"):
4915+
raise CoconutDeferredSyntaxError("d-string contents must start with a newline after opening quotes", loc)
4916+
full_text = full_text[1:]
4917+
4918+
lines = full_text.split("\n")
4919+
4920+
# determine common indentation (treat placeholders as non-whitespace)
4921+
indent = None
4922+
for i, line in enumerate(lines):
4923+
is_last = (i == len(lines) - 1)
4924+
line_no_ph = line.replace(placeholder, "X")
4925+
if not is_last and line_no_ph.strip() == "":
4926+
continue
4927+
stripped = line_no_ph.lstrip()
4928+
line_indent = line_no_ph[:len(line_no_ph) - len(stripped)]
4929+
if indent is None:
4930+
indent = line_indent
4931+
else:
4932+
common = ""
4933+
for a, b in zip(indent, line_indent):
4934+
if a == b:
4935+
common += a
4936+
else:
4937+
break
4938+
indent = common
4939+
4940+
if indent is None:
4941+
indent = ""
4942+
4943+
# apply dedentation
4944+
result_lines = []
4945+
for i, line in enumerate(lines):
4946+
is_last = (i == len(lines) - 1)
4947+
line_no_ph = line.replace(placeholder, "X")
4948+
if line_no_ph.strip() == "" and not is_last:
4949+
result_lines.append("")
4950+
elif line.startswith(indent):
4951+
result_lines.append(line[len(indent):])
4952+
elif indent.startswith(line_no_ph) and line_no_ph.strip() == "":
4953+
result_lines.append("")
4954+
else:
4955+
raise CoconutDeferredSyntaxError("inconsistent indentation in d-string", loc)
4956+
4957+
dedented = "\n".join(result_lines)
4958+
new_parts = dedented.split(placeholder)
4959+
4960+
# now delegate to f_string_handle with modified parts
4961+
# re-wrap as f-string ref and call f_string_handle
4962+
new_ref = self.wrap_f_str(strchar, new_parts, exprs)
4963+
new_token = ("r" if raw else "") + new_ref
4964+
return self.f_string_handle(original, loc, [new_token], is_t=is_t)
4965+
47814966
def decorators_handle(self, loc, tokens):
47824967
"""Process decorators."""
47834968
defs = []

coconut/compiler/grammar.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -935,12 +935,17 @@ class Grammar(object):
935935
u_string = Forward()
936936
f_string = Forward()
937937
t_string = Forward()
938+
d_string = Forward()
939+
db_string = Forward()
940+
df_string = Forward()
941+
dt_string = Forward()
938942

939943
bit_b = caseless_literal("b")
940944
raw_r = caseless_literal("r")
941945
unicode_u = caseless_literal("u", suppress=True)
942946
format_f = caseless_literal("f", suppress=True)
943947
template_t = caseless_literal("t", suppress=True)
948+
dedent_d = caseless_literal("d", suppress=True)
944949

945950
string = combine(Optional(raw_r) + string_item)
946951
# Python 2 only supports br"..." not rb"..."
@@ -949,9 +954,14 @@ class Grammar(object):
949954
u_string_ref = combine(unicode_u + string_item)
950955
f_string_tokens = combine((format_f + Optional(raw_r) | raw_r + format_f) + string_item)
951956
t_string_tokens = combine((template_t + Optional(raw_r) | raw_r + template_t) + string_item)
952-
nonbf_string = string | u_string
953-
nonb_string = nonbf_string | f_string | t_string
954-
any_string = nonb_string | b_string
957+
# d-string (PEP 822) dedented string variants
958+
d_string_tokens = combine(any_len_perm(raw_r, required=(dedent_d,)) + string_item)
959+
db_string_tokens = combine(any_len_perm(raw_r, required=(dedent_d, bit_b)) + string_item)
960+
df_string_tokens = combine(any_len_perm(raw_r, required=(dedent_d, format_f)) + string_item)
961+
dt_string_tokens = combine(any_len_perm(raw_r, required=(dedent_d, template_t)) + string_item)
962+
nonbf_string = string | u_string | d_string
963+
nonb_string = nonbf_string | f_string | t_string | df_string | dt_string
964+
any_string = nonb_string | b_string | db_string
955965
moduledoc = any_string + newline
956966
docstring = condense(moduledoc)
957967

@@ -1342,10 +1352,10 @@ class Grammar(object):
13421352
)
13431353

13441354
string_atom = Forward()
1345-
string_atom_ref = OneOrMore(nonb_string) | OneOrMore(b_string)
1346-
fixed_len_string_tokens = OneOrMore(nonbf_string) | OneOrMore(b_string)
1355+
string_atom_ref = OneOrMore(nonb_string) | OneOrMore(b_string | db_string)
1356+
fixed_len_string_tokens = OneOrMore(nonbf_string) | OneOrMore(b_string | db_string)
13471357
f_string_atom = Forward()
1348-
f_string_atom_ref = ZeroOrMore(nonbf_string) + f_string + ZeroOrMore(nonb_string)
1358+
f_string_atom_ref = ZeroOrMore(nonbf_string) + (f_string | df_string | dt_string) + ZeroOrMore(nonb_string)
13491359

13501360
keyword_atom = any_keyword_in(const_vars)
13511361
passthrough_atom = addspace(OneOrMore(passthrough_item))
@@ -2897,8 +2907,8 @@ class Grammar(object):
28972907
| fixto(end_of_line, "misplaced newline (maybe missing ':')")
28982908
)
28992909

2900-
start_f_str_regex = compile_regex(r"\br?[ft]r?$")
2901-
start_f_str_regex_len = 4
2910+
start_f_str_regex = compile_regex(r"\b[dr]{0,2}[ft][dr]{0,2}$")
2911+
start_f_str_regex_len = 5
29022912

29032913
end_f_str_expr = StartOfStrGrammar(combine(rbrace | colon | bang).leaveWhitespace())
29042914

coconut/tests/src/cocotest/agnostic/primary_2.coco

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,4 +597,37 @@ def primary_test_2() -> bool:
597597
assert final_outer_fn() == 5
598598
assert final_nested == 1
599599

600+
# d-string (PEP 822) tests
601+
assert d"""
602+
Hello
603+
World!
604+
""" == "Hello\nWorld!\n"
605+
assert d"""
606+
Hello
607+
World!""" == "Hello\nWorld!"
608+
assert d"""
609+
Hello
610+
611+
World!
612+
""" == "Hello\n\nWorld!\n"
613+
assert d"""
614+
Hello
615+
World!
616+
""" == " Hello\nWorld!\n"
617+
assert d"""
618+
Hello
619+
""" == " Hello\n"
620+
assert dr"""
621+
Hello\n
622+
World!
623+
""" == "Hello\\n\nWorld!\n"
624+
assert db"""
625+
Hello
626+
World!
627+
""" == b"Hello\nWorld!\n"
628+
name = "World"
629+
assert df"""
630+
Hello, {name}!
631+
""" == "Hello, World!\n"
632+
600633
return True

0 commit comments

Comments
 (0)