2020from unittest .mock import patch
2121
2222import pytest
23- from fsspec import AbstractFileSystem
24- from fsspec .implementations .local import LocalFileSystem
2523from neo4j_graphrag .exceptions import MarkdownLoadError , PdfLoaderError
2624from neo4j_graphrag .experimental .components .data_loader import (
2725 MarkdownLoader ,
@@ -49,31 +47,25 @@ def dummy_md_path() -> str:
4947
5048def test_pdf_loading (pdf_loader : PdfLoader , dummy_pdf_path : str ) -> None :
5149 expected_content = "Lorem ipsum dolor sit amet."
52- actual_content = pdf_loader .load_file (dummy_pdf_path , fs = LocalFileSystem () )
50+ actual_content = pdf_loader .load_file (dummy_pdf_path )
5351 assert actual_content == expected_content
5452
5553
5654def test_pdf_processing_error (pdf_loader : PdfLoader , dummy_pdf_path : str ) -> None :
57- with patch (
58- "fsspec.implementations.local.LocalFileSystem.open" ,
59- side_effect = Exception ("Failed to open" ),
60- ):
55+ with patch ("builtins.open" , side_effect = Exception ("Failed to open" )):
6156 with pytest .raises (PdfLoaderError ):
62- pdf_loader .load_file (dummy_pdf_path , fs = LocalFileSystem () )
57+ pdf_loader .load_file (dummy_pdf_path )
6358
6459
6560def test_markdown_processing_error (dummy_md_path : str ) -> None :
66- with patch (
67- "fsspec.implementations.local.LocalFileSystem.open" ,
68- side_effect = Exception ("Failed to open" ),
69- ):
61+ with patch ("builtins.open" , side_effect = Exception ("Failed to open" )):
7062 with pytest .raises (MarkdownLoadError ):
71- MarkdownLoader .load_file (dummy_md_path , fs = LocalFileSystem () )
63+ MarkdownLoader .load_file (dummy_md_path )
7264
7365
7466def test_markdown_loading () -> None :
7567 md_path = str (BASE_DIR / "sample_data/hello.md" )
76- text = MarkdownLoader .load_file (md_path , fs = LocalFileSystem () )
68+ text = MarkdownLoader .load_file (md_path )
7769 assert "# Hello" in text
7870 assert "Markdown **content**" in text
7971
@@ -89,7 +81,7 @@ async def test_markdown_loader_run() -> None:
8981
9082@pytest .mark .asyncio
9183async def test_pdf_loader_run () -> None :
92- """``PdfLoader.run`` wraps ``load_file`` with :class:`DocumentInfo` (default ``fs``) ."""
84+ """``PdfLoader.run`` wraps ``load_file`` with :class:`DocumentInfo`."""
9385 pdf_path = BASE_DIR / "sample_data/lorem_ipsum.pdf"
9486 loader = PdfLoader ()
9587 doc = await loader .run (filepath = pdf_path )
@@ -98,25 +90,6 @@ async def test_pdf_loader_run() -> None:
9890 assert doc .text == "Lorem ipsum dolor sit amet."
9991
10092
101- @pytest .mark .asyncio
102- async def test_pdf_loader_run_fs_string_resolves_with_fsspec (
103- dummy_pdf_path : str ,
104- ) -> None :
105- """``fs`` may be a protocol name passed to ``fsspec.filesystem`` (e.g. ``\" file\" ``)."""
106- loader = PdfLoader ()
107- doc = await loader .run (filepath = dummy_pdf_path , fs = "file" )
108- assert "Lorem ipsum" in doc .text
109-
110-
111- @pytest .mark .asyncio
112- async def test_markdown_loader_run_fs_string () -> None :
113- md_path = str (BASE_DIR / "sample_data/hello.md" )
114- loader = MarkdownLoader ()
115- doc = await loader .run (filepath = md_path , fs = "file" )
116- assert doc .document_info .document_type == DocumentType .MARKDOWN
117- assert "# Hello" in doc .text
118-
119-
12093@pytest .mark .asyncio
12194async def test_run_passes_metadata_to_document_info (dummy_pdf_path : str ) -> None :
12295 loader = PdfLoader ()
@@ -132,9 +105,8 @@ async def run(
132105 self ,
133106 filepath : Union [str , Path ],
134107 metadata : Optional [dict [str , str ]] = None ,
135- fs : Optional [Union [AbstractFileSystem , str ]] = None ,
136108 ) -> LoadedDocument :
137- return await super ().run (filepath = filepath , metadata = metadata , fs = fs )
109+ return await super ().run (filepath = filepath , metadata = metadata )
138110
139111 def get_document_metadata (
140112 self , text : str , metadata : dict [str , str ] | None = None
@@ -158,18 +130,6 @@ async def test_get_document_metadata_override_merges_into_document_info(
158130 assert doc .document_info .metadata ["text_length" ] == str (len (doc .text ))
159131
160132
161- def test_pdf_loader_non_local_filesystem_branch_uses_bytesio (
162- dummy_pdf_path : str ,
163- ) -> None :
164- """Non-\" default\" local FS (``auto_mkdir=True``) reads into BytesIO for pypdf."""
165- from neo4j_graphrag .experimental .components .data_loader import is_default_fs
166-
167- fs = LocalFileSystem (auto_mkdir = True )
168- assert is_default_fs (fs ) is False
169- text = PdfLoader .load_file (dummy_pdf_path , fs = fs )
170- assert text == "Lorem ipsum dolor sit amet."
171-
172-
173133def test_pdf_loader_backward_compat_reexport_module () -> None :
174134 """``pdf_loader`` submodule re-exports the same classes as ``data_loader``."""
175135 from neo4j_graphrag .experimental .components .data_loader import (
0 commit comments