1+ """Test that YAML date parsing doesn't break frontmatter processing.
2+
3+ This test reproduces GitHub issue #236 from basic-memory-cloud where date fields
4+ in YAML frontmatter are automatically parsed as datetime.date objects by PyYAML,
5+ but later code expects strings and calls .strip() on them, causing AttributeError.
6+ """
7+
8+ import pytest
9+ from pathlib import Path
10+ from basic_memory .markdown .entity_parser import EntityParser
11+
12+
13+ @pytest .fixture
14+ def test_file_with_date (tmp_path ):
15+ """Create a test file with date fields in frontmatter."""
16+ test_file = tmp_path / "test_note.md"
17+ content = """---
18+ title: Test Note
19+ date: 2025-10-24
20+ created: 2025-10-24
21+ tags:
22+ - python
23+ - testing
24+ ---
25+
26+ # Test Content
27+
28+ This file has date fields in frontmatter that PyYAML will parse as datetime.date objects.
29+ """
30+ test_file .write_text (content )
31+ return test_file
32+
33+
34+ @pytest .fixture
35+ def test_file_with_date_in_tags (tmp_path ):
36+ """Create a test file with a date value in tags (edge case)."""
37+ test_file = tmp_path / "test_note_date_tags.md"
38+ content = """---
39+ title: Test Note with Date Tags
40+ tags: 2025-10-24
41+ ---
42+
43+ # Test Content
44+
45+ This file has a date value as tags, which will be parsed as datetime.date.
46+ """
47+ test_file .write_text (content )
48+ return test_file
49+
50+
51+ @pytest .fixture
52+ def test_file_with_dates_in_tag_list (tmp_path ):
53+ """Create a test file with dates in a tag list (edge case)."""
54+ test_file = tmp_path / "test_note_dates_in_list.md"
55+ content = """---
56+ title: Test Note with Dates in Tags List
57+ tags:
58+ - valid-tag
59+ - 2025-10-24
60+ - another-tag
61+ ---
62+
63+ # Test Content
64+
65+ This file has date values mixed into tags list.
66+ """
67+ test_file .write_text (content )
68+ return test_file
69+
70+
71+ @pytest .mark .asyncio
72+ async def test_parse_file_with_date_fields (test_file_with_date , tmp_path ):
73+ """Test that files with date fields in frontmatter can be parsed without errors."""
74+ parser = EntityParser (tmp_path )
75+
76+ # This should not raise AttributeError about .strip()
77+ entity_markdown = await parser .parse_file (test_file_with_date )
78+
79+ # Verify basic parsing worked
80+ assert entity_markdown .frontmatter .title == "Test Note"
81+
82+ # Date fields should be converted to ISO format strings
83+ date_field = entity_markdown .frontmatter .metadata .get ("date" )
84+ assert date_field is not None
85+ assert isinstance (date_field , str ), "Date should be converted to string"
86+ assert date_field == "2025-10-24" , "Date should be in ISO format"
87+
88+ created_field = entity_markdown .frontmatter .metadata .get ("created" )
89+ assert created_field is not None
90+ assert isinstance (created_field , str ), "Created date should be converted to string"
91+ assert created_field == "2025-10-24" , "Created date should be in ISO format"
92+
93+
94+ @pytest .mark .asyncio
95+ async def test_parse_file_with_date_as_tags (test_file_with_date_in_tags , tmp_path ):
96+ """Test that date values in tags field don't cause errors."""
97+ parser = EntityParser (tmp_path )
98+
99+ # This should not raise AttributeError - date should be converted to string
100+ entity_markdown = await parser .parse_file (test_file_with_date_in_tags )
101+ assert entity_markdown .frontmatter .title == "Test Note with Date Tags"
102+
103+ # The date should be converted to ISO format string before parse_tags processes it
104+ tags = entity_markdown .frontmatter .tags
105+ assert tags is not None
106+ assert isinstance (tags , list )
107+ # The date value should be converted to string
108+ assert "2025-10-24" in tags
109+
110+
111+ @pytest .mark .asyncio
112+ async def test_parse_file_with_dates_in_tag_list (test_file_with_dates_in_tag_list , tmp_path ):
113+ """Test that date values in a tags list don't cause errors."""
114+ parser = EntityParser (tmp_path )
115+
116+ # This should not raise AttributeError - dates should be converted to strings
117+ entity_markdown = await parser .parse_file (test_file_with_dates_in_tag_list )
118+ assert entity_markdown .frontmatter .title == "Test Note with Dates in Tags List"
119+
120+ # Tags should be parsed
121+ tags = entity_markdown .frontmatter .tags
122+ assert tags is not None
123+ assert isinstance (tags , list )
124+
125+ # Should have 3 tags (2 valid + 1 date converted to ISO string)
126+ assert len (tags ) == 3
127+ assert "valid-tag" in tags
128+ assert "another-tag" in tags
129+ # Date should be converted to ISO format string
130+ assert "2025-10-24" in tags
131+
132+
133+ @pytest .mark .asyncio
134+ async def test_parse_file_with_various_yaml_types (tmp_path ):
135+ """Test that various YAML types in frontmatter don't cause errors.
136+
137+ This reproduces the broader issue from GitHub #236 where ANY non-string
138+ YAML type (dates, lists, numbers, booleans) can cause AttributeError
139+ when code expects strings and calls .strip().
140+ """
141+ test_file = tmp_path / "test_yaml_types.md"
142+ content = """---
143+ title: Test YAML Types
144+ date: 2025-10-24
145+ priority: 1
146+ completed: true
147+ tags:
148+ - python
149+ - testing
150+ metadata:
151+ author: Test User
152+ version: 1.0
153+ ---
154+
155+ # Test Content
156+
157+ This file has various YAML types that need to be normalized.
158+ """
159+ test_file .write_text (content )
160+
161+ parser = EntityParser (tmp_path )
162+ entity_markdown = await parser .parse_file (test_file )
163+
164+ # All values should be accessible without AttributeError
165+ assert entity_markdown .frontmatter .title == "Test YAML Types"
166+
167+ # Date should be converted to ISO string
168+ date_field = entity_markdown .frontmatter .metadata .get ("date" )
169+ assert isinstance (date_field , str )
170+ assert date_field == "2025-10-24"
171+
172+ # Number should be converted to string
173+ priority = entity_markdown .frontmatter .metadata .get ("priority" )
174+ assert isinstance (priority , str )
175+ assert priority == "1"
176+
177+ # Boolean should be converted to string
178+ completed = entity_markdown .frontmatter .metadata .get ("completed" )
179+ assert isinstance (completed , str )
180+ assert completed == "True" # Python's str(True) always returns "True"
181+
182+ # List should be preserved as list, but items should be strings
183+ tags = entity_markdown .frontmatter .tags
184+ assert isinstance (tags , list )
185+ assert all (isinstance (tag , str ) for tag in tags )
186+ assert "python" in tags
187+ assert "testing" in tags
188+
189+ # Dict should be preserved as dict, but nested values should be strings
190+ metadata = entity_markdown .frontmatter .metadata .get ("metadata" )
191+ assert isinstance (metadata , dict )
192+ assert isinstance (metadata .get ("author" ), str )
193+ assert metadata .get ("author" ) == "Test User"
194+ assert isinstance (metadata .get ("version" ), str )
195+ assert metadata .get ("version" ) in ("1.0" , "1" )
196+
197+
198+ @pytest .mark .asyncio
199+ async def test_parse_file_with_datetime_objects (tmp_path ):
200+ """Test that datetime objects (not just date objects) are properly normalized.
201+
202+ This tests the edge case where frontmatter might contain datetime values
203+ with time components (as parsed by PyYAML), ensuring they're converted to ISO format strings.
204+ """
205+ test_file = tmp_path / "test_datetime.md"
206+
207+ # YAML datetime strings that PyYAML will parse as datetime objects
208+ # Format: YYYY-MM-DD HH:MM:SS or YYYY-MM-DDTHH:MM:SS
209+ content = """---
210+ title: Test Datetime
211+ created_at: 2025-10-24 14:30:00
212+ updated_at: 2025-10-24T00:00:00
213+ ---
214+
215+ # Test Content
216+
217+ This file has datetime values in frontmatter that PyYAML will parse as datetime objects.
218+ """
219+ test_file .write_text (content )
220+
221+ parser = EntityParser (tmp_path )
222+ entity_markdown = await parser .parse_file (test_file )
223+
224+ # Verify datetime objects are converted to ISO format strings
225+ created_at = entity_markdown .frontmatter .metadata .get ("created_at" )
226+ assert isinstance (created_at , str ), "Datetime should be converted to string"
227+ # PyYAML parses "2025-10-24 14:30:00" as datetime, which we normalize to ISO
228+ assert "2025-10-24" in created_at and "14:30:00" in created_at , \
229+ f"Datetime with time should be normalized to ISO format, got: { created_at } "
230+
231+ updated_at = entity_markdown .frontmatter .metadata .get ("updated_at" )
232+ assert isinstance (updated_at , str ), "Datetime should be converted to string"
233+ # PyYAML parses "2025-10-24T00:00:00" as datetime, which we normalize to ISO
234+ assert "2025-10-24" in updated_at and "00:00:00" in updated_at , \
235+ f"Datetime at midnight should be normalized to ISO format, got: { updated_at } "
0 commit comments