run-llama · Br1an67 · Mar 1, 2026 · AstraBert · Mar 2, 2026 · Br1an67
diff --git a/llama-index-core/llama_index/core/node_parser/relational/markdown_element.py b/llama-index-core/llama_index/core/node_parser/relational/markdown_element.py
@@ -170,13 +170,12 @@ def extract_elements(
                         type="code",
                         element=line.lstrip("```"),
                     )
-                elif currentElement is not None and currentElement.type == "text":
-                    currentElement.element += "\n" + line
                 else:
+                    # Start of a new code block
                     if currentElement is not None:
                         elements.append(currentElement)
                     currentElement = Element(
-                        id=f"id_{len(elements)}", type="text", element=line
+                        id=f"id_{len(elements)}", type="code", element=""
                     )
             elif currentElement is not None and currentElement.type == "code":
                 currentElement.element += "\n" + line
@@ -266,10 +265,10 @@ def extract_elements(
                         element=element.element,
                     )
             else:
-                # if the element is not a table, keep it as to text
+                # if the element is not a table, keep its original type
                 elements[idx] = Element(
                     id=f"id_{node_id}_{idx}" if node_id else f"id_{idx}",
-                    type="text",
+                    type=element.type,
                     element=element.element,
                 )
 

diff --git a/llama-index-core/tests/node_parser/test_markdown_element.py b/llama-index-core/tests/node_parser/test_markdown_element.py
@@ -2755,3 +2755,45 @@ def test_extract_html_table():
     assert test_document.text[nodes[3].start_char_idx : nodes[3].end_char_idx] == table2
     assert type(nodes[4]) is TextNode
     assert test_document.text[nodes[4].start_char_idx : nodes[4].end_char_idx] == table2
+
+
+def test_code_block_extraction() -> None:
+    """Test that code blocks are properly extracted as code elements."""
+    node_parser = MarkdownElementNodeParser(llm=MockLLM())
+
+    document = """my cool file
+```
+my cool code block
+```
+some text after"""
+
+    result = node_parser.extract_elements(document)
+
+    # Should have 3 elements: text, code, text
+    assert len(result) == 3
+    assert result[0].type == "text"
+    assert "my cool file" in result[0].element
+    assert result[1].type == "code"
+    assert "my cool code block" in result[1].element
+    assert result[2].type == "text"
+    assert "some text after" in result[2].element
+
+
+def test_code_block_with_language() -> None:
+    """Test code block with language identifier."""
+    node_parser = MarkdownElementNodeParser(llm=MockLLM())
+
+    document = """intro text
+```python
+def hello():
+    pass
+```
+outro text"""
+
+    result = node_parser.extract_elements(document)
+
+    assert len(result) == 3
+    assert result[0].type == "text"
+    assert result[1].type == "code"
+    assert "def hello():" in result[1].element
+    assert result[2].type == "text"