Skip to content

Commit f0d7398

Browse files
fix: Quote string values in YAML frontmatter to handle special characters (#418)
Signed-off-by: phernandez <[email protected]> Signed-off-by: Paul Hernandez <[email protected]> Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com> Co-authored-by: Paul Hernandez <[email protected]>
1 parent 581b7b1 commit f0d7398

File tree

3 files changed

+109
-76
lines changed

3 files changed

+109
-76
lines changed

src/basic_memory/file_utils.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,18 +177,22 @@ def dump_frontmatter(post: frontmatter.Post) -> str:
177177
"""
178178
Serialize frontmatter.Post to markdown with Obsidian-compatible YAML format.
179179
180-
This function ensures that tags are formatted as YAML lists instead of JSON arrays:
180+
This function ensures that:
181+
1. Tags are formatted as YAML lists instead of JSON arrays
182+
2. String values are properly quoted to handle special characters (colons, etc.)
181183
182184
Good (Obsidian compatible):
183185
---
186+
title: "L2 Governance Core (Split: Core)"
184187
tags:
185188
- system
186189
- overview
187190
- reference
188191
---
189192
190-
Bad (current behavior):
193+
Bad (causes parsing errors):
191194
---
195+
title: L2 Governance Core (Split: Core) # Unquoted colon breaks YAML
192196
tags: ["system", "overview", "reference"]
193197
---
194198
@@ -203,8 +207,13 @@ def dump_frontmatter(post: frontmatter.Post) -> str:
203207
return post.content
204208

205209
# Serialize YAML with block style for lists
210+
# SafeDumper automatically quotes values with special characters (colons, etc.)
206211
yaml_str = yaml.dump(
207-
post.metadata, sort_keys=False, allow_unicode=True, default_flow_style=False
212+
post.metadata,
213+
sort_keys=False,
214+
allow_unicode=True,
215+
default_flow_style=False,
216+
Dumper=yaml.SafeDumper
208217
)
209218

210219
# Construct the final markdown with frontmatter

tests/sync/test_sync_service.py

Lines changed: 0 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,79 +1651,6 @@ async def mock_sync_markdown_file(path, new):
16511651
assert entity is not None
16521652

16531653

1654-
@pytest.mark.asyncio
1655-
@pytest.mark.skip("flaky on ci tests")
1656-
async def test_circuit_breaker_tracks_multiple_files(
1657-
sync_service: SyncService, project_config: ProjectConfig
1658-
):
1659-
"""Test that circuit breaker tracks multiple failing files independently."""
1660-
from unittest.mock import patch
1661-
1662-
project_dir = project_config.home
1663-
1664-
# Create multiple files with valid markdown
1665-
await create_test_file(
1666-
project_dir / "file1.md",
1667-
"""
1668-
---
1669-
type: knowledge
1670-
---
1671-
# File 1
1672-
Content 1
1673-
""",
1674-
)
1675-
await create_test_file(
1676-
project_dir / "file2.md",
1677-
"""
1678-
---
1679-
type: knowledge
1680-
---
1681-
# File 2
1682-
Content 2
1683-
""",
1684-
)
1685-
await create_test_file(
1686-
project_dir / "file3.md",
1687-
"""
1688-
---
1689-
type: knowledge
1690-
---
1691-
# File 3
1692-
Content 3
1693-
""",
1694-
)
1695-
1696-
# Mock to make file1 and file2 fail, but file3 succeed
1697-
original_sync_markdown_file = sync_service.sync_markdown_file
1698-
1699-
async def mock_sync_markdown_file(path, new):
1700-
if "file1.md" in path or "file2.md" in path:
1701-
raise ValueError(f"Failure for {path}")
1702-
# file3 succeeds - use real implementation
1703-
return await original_sync_markdown_file(path, new)
1704-
1705-
with patch.object(sync_service, "sync_markdown_file", side_effect=mock_sync_markdown_file):
1706-
# Fail 3 times for file1 and file2 (file3 succeeds each time)
1707-
await force_full_scan(sync_service)
1708-
await sync_service.sync(project_dir) # Fail count: file1=1, file2=1
1709-
await touch_file(project_dir / "file1.md") # Touch to trigger incremental scan
1710-
await touch_file(project_dir / "file2.md") # Touch to trigger incremental scan
1711-
await force_full_scan(sync_service)
1712-
await sync_service.sync(project_dir) # Fail count: file1=2, file2=2
1713-
await touch_file(project_dir / "file1.md") # Touch to trigger incremental scan
1714-
await touch_file(project_dir / "file2.md") # Touch to trigger incremental scan
1715-
report3 = await sync_service.sync(project_dir) # Fail count: file1=3, file2=3, now skipped
1716-
1717-
# Both files should be skipped on third sync
1718-
assert len(report3.skipped_files) == 2
1719-
skipped_paths = {f.path for f in report3.skipped_files}
1720-
assert "file1.md" in skipped_paths
1721-
assert "file2.md" in skipped_paths
1722-
1723-
# Verify file3 is not in failures dict
1724-
assert "file3.md" not in sync_service._file_failures
1725-
1726-
17271654
@pytest.mark.asyncio
17281655
async def test_circuit_breaker_handles_checksum_computation_failure(
17291656
sync_service: SyncService, project_config: ProjectConfig

tests/utils/test_frontmatter_obsidian_compatible.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,100 @@ def test_roundtrip_compatibility():
181181
assert parsed_post.metadata["title"] == original_post.metadata["title"]
182182
assert parsed_post.metadata["tags"] == original_post.metadata["tags"]
183183
assert parsed_post.metadata["type"] == original_post.metadata["type"]
184+
185+
186+
def test_title_with_colon():
187+
"""Test that titles with colons are properly quoted and don't break YAML parsing."""
188+
post = frontmatter.Post("Test content")
189+
post.metadata["title"] = "L2 Governance Core (Split: Core)"
190+
post.metadata["type"] = "note"
191+
192+
result = dump_frontmatter(post)
193+
194+
# PyYAML uses single quotes for values with special characters
195+
assert "title: 'L2 Governance Core (Split: Core)'" in result
196+
197+
# Should be parseable back
198+
parsed_post = frontmatter.loads(result)
199+
assert parsed_post.metadata["title"] == "L2 Governance Core (Split: Core)"
200+
201+
202+
def test_title_starting_with_word_and_colon():
203+
"""Test that titles starting with word and colon are properly quoted."""
204+
post = frontmatter.Post("Test content")
205+
post.metadata["title"] = "Governance: Rootkeeper Manifest-Diff Prompt"
206+
post.metadata["type"] = "note"
207+
208+
result = dump_frontmatter(post)
209+
210+
# PyYAML auto-quotes values with colons (uses single quotes by default)
211+
assert "title: 'Governance: Rootkeeper Manifest-Diff Prompt'" in result
212+
213+
# Should be parseable back
214+
parsed_post = frontmatter.loads(result)
215+
assert parsed_post.metadata["title"] == "Governance: Rootkeeper Manifest-Diff Prompt"
216+
217+
218+
def test_multiple_colons_in_title():
219+
"""Test that titles with multiple colons are properly quoted."""
220+
post = frontmatter.Post("Test content")
221+
post.metadata["title"] = "API: HTTP: Response Codes: Overview"
222+
post.metadata["type"] = "note"
223+
224+
result = dump_frontmatter(post)
225+
226+
# PyYAML auto-quotes values with colons
227+
assert "title: 'API: HTTP: Response Codes: Overview'" in result
228+
229+
# Should be parseable back
230+
parsed_post = frontmatter.loads(result)
231+
assert parsed_post.metadata["title"] == "API: HTTP: Response Codes: Overview"
232+
233+
234+
def test_other_special_characters_in_title():
235+
"""Test that titles with other special YAML characters are properly quoted."""
236+
special_chars_titles = [
237+
"Title with @ symbol",
238+
"Title with # hashtag",
239+
"Title with & ampersand",
240+
"Title with * asterisk",
241+
"Title [with brackets]",
242+
"Title {with braces}",
243+
"Title with | pipe",
244+
"Title with > greater",
245+
]
246+
247+
for title in special_chars_titles:
248+
post = frontmatter.Post("Test content")
249+
post.metadata["title"] = title
250+
post.metadata["type"] = "note"
251+
252+
result = dump_frontmatter(post)
253+
254+
# Should be parseable without errors
255+
parsed_post = frontmatter.loads(result)
256+
assert parsed_post.metadata["title"] == title
257+
258+
259+
def test_all_string_values_quoted():
260+
"""Test that string values with special characters are automatically quoted."""
261+
post = frontmatter.Post("Test content")
262+
post.metadata["title"] = "Test: Title"
263+
post.metadata["permalink"] = "test-permalink"
264+
post.metadata["type"] = "note"
265+
post.metadata["custom_field"] = "value: with colon"
266+
267+
result = dump_frontmatter(post)
268+
269+
# PyYAML auto-quotes values with special chars, leaves simple values unquoted
270+
assert "title: 'Test: Title'" in result # Has colon, gets quoted
271+
assert "permalink: test-permalink" in result # Simple value, no quotes
272+
assert "type: note" in result # Simple value, no quotes
273+
assert "custom_field: 'value: with colon'" in result # Has colon, gets quoted
274+
275+
# Should be parseable back correctly
276+
parsed_post = frontmatter.loads(result)
277+
assert parsed_post.metadata["title"] == "Test: Title"
278+
assert parsed_post.metadata["permalink"] == "test-permalink"
279+
assert parsed_post.metadata["type"] == "note"
280+
assert parsed_post.metadata["custom_field"] == "value: with colon"

0 commit comments

Comments
 (0)