Skip to content

Commit 61699f8

Browse files
Merge pull request #47 from joaopauloschuler/copilot/test-list-directory-tree-md-files
Fix markdown section extraction to skip content inside code blocks
2 parents b5aad55 + 2385f9d commit 61699f8

File tree

2 files changed

+120
-1
lines changed

2 files changed

+120
-1
lines changed

src/smolagents/bp_tools.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1888,7 +1888,28 @@ def extract_function_signatures(filename: str, language: str = "python") -> str:
18881888

18891889
if language.lower() in ["markdown", "md"]:
18901890
# Extract markdown sections (lines starting with #)
1891-
md_sections = [line.strip() for line in content.split('\n') if line.strip().startswith('#')]
1891+
# Skip lines inside code blocks (fenced with ``` or ~~~)
1892+
md_sections = []
1893+
code_block_delimiter = None # Stores the opening delimiter type
1894+
for line in content.split('\n'):
1895+
stripped = line.strip()
1896+
# Check for code block delimiters (``` or ~~~)
1897+
if code_block_delimiter is None:
1898+
# Not in a code block - check for opening delimiter
1899+
if stripped.startswith('```'):
1900+
code_block_delimiter = '```'
1901+
continue
1902+
elif stripped.startswith('~~~'):
1903+
code_block_delimiter = '~~~'
1904+
continue
1905+
else:
1906+
# Inside a code block - check for matching closing delimiter
1907+
if stripped.startswith(code_block_delimiter):
1908+
code_block_delimiter = None
1909+
continue
1910+
# Only extract headers when not inside a code block
1911+
if code_block_delimiter is None and stripped.startswith('#'):
1912+
md_sections.append(stripped)
18921913
if not md_sections:
18931914
return f"No sections found in '{filename}'"
18941915
return "\n".join(md_sections)

tests/test_bp_context_tools.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,38 @@ def test_function_signatures_with_non_source_files(self, tmp_path):
274274
assert "script.py" in result
275275
assert "def my_function():" in result
276276

277+
def test_markdown_sections_skip_code_blocks(self, tmp_path):
278+
"""Test that markdown section extraction skips content inside code blocks"""
279+
# Create a markdown file with headers inside code blocks
280+
(tmp_path / "doc.md").write_text(
281+
"# Title\n"
282+
"Some intro text\n"
283+
"```python\n"
284+
"# This is a Python comment, not a header\n"
285+
"def hello():\n"
286+
" pass\n"
287+
"```\n"
288+
"## Section 2\n"
289+
"More text\n"
290+
"~~~bash\n"
291+
"# Bash comment\n"
292+
"echo hello\n"
293+
"~~~\n"
294+
"### Section 3\n"
295+
)
296+
297+
result = list_directory_tree(str(tmp_path), max_depth=1, show_files=True, add_function_signatures=True)
298+
299+
# Should show the file
300+
assert "doc.md" in result
301+
# Should show real markdown headers
302+
assert "# Title" in result
303+
assert "## Section 2" in result
304+
assert "### Section 3" in result
305+
# Should NOT show comments inside code blocks
306+
assert "# This is a Python comment" not in result
307+
assert "# Bash comment" not in result
308+
277309

278310
class TestSearchInFiles:
279311
def test_basic_search(self, tmp_path):
@@ -629,6 +661,72 @@ def test_no_signatures_found(self, tmp_path):
629661

630662
assert "No function/class signatures found" in result
631663

664+
def test_markdown_sections_basic(self, tmp_path):
665+
"""Test extracting markdown section headers"""
666+
file_path = tmp_path / "test.md"
667+
file_path.write_text("# Header 1\nSome text\n## Header 2\nMore text\n### Header 3\n")
668+
669+
result = extract_function_signatures(str(file_path), "markdown")
670+
671+
assert "# Header 1" in result
672+
assert "## Header 2" in result
673+
assert "### Header 3" in result
674+
assert "Some text" not in result
675+
676+
def test_markdown_sections_skip_code_blocks(self, tmp_path):
677+
"""Test that markdown section extraction skips content inside code blocks"""
678+
file_path = tmp_path / "test.md"
679+
file_path.write_text(
680+
"# Real Header\n"
681+
"```python\n"
682+
"# This is a comment in code block\n"
683+
"def hello():\n"
684+
" pass\n"
685+
"```\n"
686+
"## Another Real Header\n"
687+
"~~~bash\n"
688+
"# Bash comment\n"
689+
"~~~\n"
690+
"### Third Header\n"
691+
)
692+
693+
result = extract_function_signatures(str(file_path), "markdown")
694+
695+
assert "# Real Header" in result
696+
assert "## Another Real Header" in result
697+
assert "### Third Header" in result
698+
# Comments inside code blocks should NOT be extracted
699+
assert "# This is a comment in code block" not in result
700+
assert "# Bash comment" not in result
701+
702+
def test_markdown_no_sections(self, tmp_path):
703+
"""Test markdown file with no section headers"""
704+
file_path = tmp_path / "test.md"
705+
file_path.write_text("Just plain text\nNo headers here\n")
706+
707+
result = extract_function_signatures(str(file_path), "markdown")
708+
709+
assert "No sections found" in result
710+
711+
def test_markdown_mixed_delimiters(self, tmp_path):
712+
"""Test that mixed code block delimiters are handled correctly"""
713+
file_path = tmp_path / "test.md"
714+
# In proper markdown, ``` must be closed with ``` and ~~~ with ~~~
715+
# So a ~~~ should NOT close a block opened with ```
716+
file_path.write_text(
717+
"# Header 1\n"
718+
"```python\n"
719+
"# comment in code\n"
720+
"~~~\n" # This should NOT close the block
721+
"## Should be skipped (still in code block)\n"
722+
)
723+
724+
result = extract_function_signatures(str(file_path), "markdown")
725+
726+
# Only the first header should be extracted since the code block is never properly closed
727+
assert "# Header 1" in result
728+
assert "## Should be skipped" not in result
729+
632730
def test_nonexistent_file(self):
633731
"""Test nonexistent file"""
634732
result = extract_function_signatures("/nonexistent/file.py", "python")

0 commit comments

Comments
 (0)