neo4j · ali-sedaghatbaf · Mar 27, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
@@ -2,11 +2,21 @@
 
 ## Next
 
+### Added
+
+- MarkdownLoader (experimental): added a Markdown loader to support `.md` and `.markdown` files.
+
+### Changed
+
+- SimpleKG pipeline (experimental): the `from_pdf` parameter is deprecated in favor of `from_file` (PDF and Markdown inputs). `from_pdf` still works but emits a deprecation warning and will be removed in a future version.
+- Data loaders (experimental): the `PdfDocument` type name is deprecated in favor of `LoadedDocument`; `PdfDocument` remains available as a backward-compatible alias with a deprecation warning.
+
 ## 1.14.1
 
 ### Added
 
 - `NodeType` and `RelationshipType` now reject labels and types that start or end with double underscores (`__`), e.g. `__Person__`. This convention is reserved for internal Neo4j GraphRAG labels. A `ValidationError` is raised on construction.
+- SimpleKG pipeline (experimental): Markdown inputs (`.md` / `.markdown`) are supported alongside PDF via the default extension-based file loader when building from a file path.
 
 ### Changed
 

@@ -138,7 +138,7 @@ kg_builder = SimpleKGPipeline(
         "patterns": patterns,
     },
     on_error="IGNORE",
-    from_pdf=False,
+    from_file=False,
 )
 
 # Run the pipeline on a piece of text

@@ -18,13 +18,19 @@ Component
 DataLoader
 ==========
 
-.. autoclass:: neo4j_graphrag.experimental.components.pdf_loader.DataLoader
+.. autoclass:: neo4j_graphrag.experimental.components.data_loader.DataLoader
     :members: run, get_document_metadata
 
 PdfLoader
 =========
 
-.. autoclass:: neo4j_graphrag.experimental.components.pdf_loader.PdfLoader
+.. autoclass:: neo4j_graphrag.experimental.components.data_loader.PdfLoader
+    :members: run, load_file
+
+MarkdownLoader
+==============
+
+.. autoclass:: neo4j_graphrag.experimental.components.data_loader.MarkdownLoader
     :members: run, load_file
 
 TextSplitter

@@ -44,6 +44,11 @@ DocumentInfo
 
 .. autoclass:: neo4j_graphrag.experimental.components.types.DocumentInfo
 
+LoadedDocument
+==============
+
+.. autoclass:: neo4j_graphrag.experimental.components.types.LoadedDocument
+
 
 TextChunk
 =========

@@ -54,10 +54,10 @@ is utilizing the `SimpleKGPipeline` interface:
         llm=llm, # an LLMInterface for Entity and Relation extraction
         driver=neo4j_driver,  # a neo4j driver to write results to graph
         embedder=embedder,  # an Embedder for chunks
-        from_pdf=True,   # set to False if parsing an already extracted text
+        from_file=True,   # set to False if parsing an already extracted text
     )
     await kg_builder.run_async(file_path=str(file_path))
-    # await kg_builder.run_async(text="my text")  # if using from_pdf=False
+    # await kg_builder.run_async(text="my text")  # if using from_file=False
 
 
 See:
@@ -216,9 +216,12 @@ instances of specific components to the `SimpleKGPipeline`. The components that
 customized at the moment are:
 
 - `text_splitter`: must be an instance of :ref:`TextSplitter`
-- `pdf_loader`: must be an instance of :ref:`PdfLoader`
+- `file_loader`: must be an instance of :ref:`PdfLoader` or :ref:`MarkdownLoader`
 - `kg_writer`: must be an instance of :ref:`KGWriter`
 
+The legacy names ``from_pdf`` and ``pdf_loader`` (in Python, YAML, or JSON) are still accepted
+with a deprecation warning; use ``from_file`` and ``file_loader`` instead.
+
 For instance, the following code can be used to customize the chunk size and
 chunk overlap in the text splitter component:
 
@@ -450,7 +453,7 @@ within the configuration file.
 .. code:: json
 
     {
-        "from_pdf": false,
+        "from_file": false,
         "perform_entity_resolution": true,
         "neo4j_database": "myDb",
         "on_error": "IGNORE",
@@ -502,7 +505,7 @@ or in YAML:
 
 .. code:: yaml
 
-    from_pdf: false
+    from_file: false
     perform_entity_resolution: true
     neo4j_database: myDb
     on_error: IGNORE
@@ -578,7 +581,7 @@ Each of these components can be run individually:
 .. code:: python
 
     import asyncio
-    from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
+    from neo4j_graphrag.experimental.components.data_loader import PdfLoader
     my_component = PdfLoader()
     asyncio.run(my_component.run("my_file.pdf"))
 
@@ -588,7 +591,7 @@ They can also be used within a pipeline:
 .. code:: python
 
     from neo4j_graphrag.experimental.pipeline import Pipeline
-    from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
+    from neo4j_graphrag.experimental.components.data_loader import PdfLoader
     pipeline = Pipeline()
     my_component = PdfLoader()
     pipeline.add_component(my_component, "component_name")
@@ -604,7 +607,7 @@ This package currently supports text extraction from PDFs:
 .. code:: python
 
     from pathlib import Path
-    from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
+    from neo4j_graphrag.experimental.components.data_loader import PdfLoader
 
     loader = PdfLoader()
     await loader.run(filepath=Path("my_file.pdf"))
@@ -614,12 +617,13 @@ To implement your own loader, use the `DataLoader` interface:
 .. code:: python
 
     from pathlib import Path
-    from neo4j_graphrag.experimental.components.pdf_loader import DataLoader, PdfDocument
+    from neo4j_graphrag.experimental.components.data_loader import DataLoader
+    from neo4j_graphrag.experimental.components.types import LoadedDocument
 
     class MyDataLoader(DataLoader):
-        async def run(self, filepath: Path, metadata: Optional[Dict[str, str]] = None) -> PdfDocument:
+        async def run(self, filepath: Path, metadata: Optional[Dict[str, str]] = None) -> LoadedDocument:
             # process file in `filepath`
-            return PdfDocument(
+            return LoadedDocument(
                 text="text",
                 document_info=DocumentInfo(
                     path=str(filepath),

@@ -54,7 +54,7 @@ async def run_kg_pipeline_with_auto_schema() -> None:
             llm=llm,
             driver=driver,
             embedder=embedder,
-            from_pdf=True,
+            from_file=True,
         )
 
         print(f"Processing PDF file: {PDF_FILE}")

@@ -73,7 +73,7 @@ async def run_kg_pipeline_with_auto_schema() -> None:
             llm=llm,
             driver=driver,
             embedder=embedder,
-            from_pdf=False,  # Using raw text input, not PDF
+            from_file=False,  # Using raw text input, not PDF
         )
 
         # Run the pipeline on the text

@@ -36,7 +36,7 @@
             }
         }
     },
-    "from_pdf": false,
+    "from_file": false,
     "schema": {
         "node_types": [
             "Person",

@@ -24,7 +24,7 @@ embedder_config:
     api_key:
       resolver_: ENV
       var_: OPENAI_API_KEY
-from_pdf: false
+from_file: false
 schema:
   node_types:
     - label: Person

@@ -36,7 +36,7 @@
             }
         }
     },
-    "from_pdf": true,
+    "from_file": true,
     "schema": {
         "node_types": [
             "Person",
@@ -105,8 +105,8 @@
             "chunk_overlap": 10
         }
     },
-    "pdf_loader": {
-        "class_": "pdf_loader.PdfLoader",
+    "file_loader": {
+        "class_": "data_loader.PdfLoader",
         "run_params_": {
             "fs": "http"
         }

@@ -79,7 +79,7 @@ async def define_and_run_pipeline(
             "relationship_types": RELATIONSHIP_TYPES,
             "patterns": PATTERNS,
         },
-        from_pdf=False,
+        from_file=False,
         neo4j_database=DATABASE,
     )
     return await kg_builder.run_async(

@@ -1,20 +1,24 @@
 """Create a custom data loader to transform content into text."""
 
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
 
-from neo4j_graphrag.experimental.components.pdf_loader import DataLoader
-from neo4j_graphrag.experimental.components.types import DocumentInfo, PdfDocument
+from fsspec import AbstractFileSystem
+
+from neo4j_graphrag.experimental.components.data_loader import DataLoader
+from neo4j_graphrag.experimental.components.types import DocumentInfo, LoadedDocument
 
 
 class MyLoader(DataLoader):
     async def run(
         self,
-        filepath: Path,
+        filepath: Union[str, Path],
         metadata: Optional[Dict[str, str]] = None,
-    ) -> PdfDocument:
-        # Implement logic here
-        return PdfDocument(
+        fs: Optional[Union[AbstractFileSystem, str]] = None,
+    ) -> LoadedDocument:
+        # Implement logic here; use ``fs`` when reading from non-local storage.
+        _ = fs
+        return LoadedDocument(
             text="<extracted text>",
             document_info=DocumentInfo(
                 path=str(filepath),

@@ -3,7 +3,7 @@
 import asyncio
 from pathlib import Path
 
-from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
+from neo4j_graphrag.experimental.components.data_loader import PdfLoader
 
 root_dir = Path(__file__).parents[4]
 file_path = root_dir / "data" / "Harry Potter and the Chamber of Secrets Summary.pdf"

@@ -2,7 +2,7 @@
 
 import asyncio
 
-from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
+from neo4j_graphrag.experimental.components.data_loader import PdfLoader
 
 url = "https://raw.githubusercontent.com/neo4j/neo4j-graphrag-python/c166afc4d5abc56a5686f3da46a97ed7c07da19d/examples/data/Harry%20Potter%20and%20the%20Chamber%20of%20Secrets%20Summary.pdf"
 

@@ -22,7 +22,7 @@
     OnError,
 )
 from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter
-from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
+from neo4j_graphrag.experimental.components.data_loader import PdfLoader
 from neo4j_graphrag.experimental.components.schema import (
     SchemaBuilder,
     NodeType,

@@ -22,7 +22,7 @@
     OnError,
 )
 from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter
-from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
+from neo4j_graphrag.experimental.components.data_loader import PdfLoader
 from neo4j_graphrag.experimental.components.resolver import (
     SinglePropertyExactMatchResolver,
 )

@@ -29,7 +29,7 @@
     OnError,
 )
 from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter
-from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
+from neo4j_graphrag.experimental.components.data_loader import PdfLoader
 from neo4j_graphrag.experimental.components.schema import (
     SchemaBuilder,
     NodeType,

@@ -128,6 +128,18 @@ class PdfLoaderError(Neo4jGraphRagError):
     pass
 
 
+class MarkdownLoadError(Neo4jGraphRagError):
+    """Custom exception for errors in Markdown loader."""
+
+    pass
+
+
+class UnsupportedDocumentFormatError(Neo4jGraphRagError):
+    """Raised when a file extension is not supported by the document file loader."""
+
+    pass
+
+
 class PromptMissingPlaceholderError(Neo4jGraphRagError):
     """Exception raised when a prompt is missing an expected placeholder."""