Skip to content

Commit 19f7902

Browse files
committed
gen: #file:pdf_loaders.py のテストコードを #file:test_pdf_loaders.py に書いて
1 parent 4c70616 commit 19f7902

File tree

1 file changed

+144
-0
lines changed

1 file changed

+144
-0
lines changed
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import os
2+
from unittest.mock import Mock, patch
3+
4+
from langchain_core.documents import Document
5+
6+
from template_langgraph.utilities.pdf_loaders import (
7+
PdfLoaderWrapper,
8+
Settings,
9+
get_pdf_loader_settings,
10+
)
11+
12+
13+
class TestSettings:
14+
def test_default_settings(self):
15+
"""Test default settings values."""
16+
settings = Settings()
17+
assert settings.pdf_loader_data_dir_path == "./data"
18+
19+
def test_custom_settings(self):
20+
"""Test custom settings values."""
21+
settings = Settings(pdf_loader_data_dir_path="/custom/path")
22+
assert settings.pdf_loader_data_dir_path == "/custom/path"
23+
24+
@patch.dict(os.environ, {"PDF_LOADER_DATA_DIR_PATH": "/env/path"})
25+
def test_env_settings(self):
26+
"""Test settings from environment variables."""
27+
settings = Settings()
28+
assert settings.pdf_loader_data_dir_path == "/env/path"
29+
30+
31+
class TestGetPdfLoaderSettings:
32+
def test_get_pdf_loader_settings_returns_settings(self):
33+
"""Test that get_pdf_loader_settings returns a Settings instance."""
34+
settings = get_pdf_loader_settings()
35+
assert isinstance(settings, Settings)
36+
37+
def test_get_pdf_loader_settings_is_cached(self):
38+
"""Test that get_pdf_loader_settings is cached."""
39+
settings1 = get_pdf_loader_settings()
40+
settings2 = get_pdf_loader_settings()
41+
assert settings1 is settings2
42+
43+
44+
class TestPdfLoaderWrapper:
45+
def test_init_with_default_settings(self):
46+
"""Test PdfLoaderWrapper initialization with default settings."""
47+
wrapper = PdfLoaderWrapper()
48+
assert isinstance(wrapper.settings, Settings)
49+
assert wrapper.settings.pdf_loader_data_dir_path == "./data"
50+
51+
def test_init_with_custom_settings(self):
52+
"""Test PdfLoaderWrapper initialization with custom settings."""
53+
custom_settings = Settings(pdf_loader_data_dir_path="/custom/path")
54+
wrapper = PdfLoaderWrapper(settings=custom_settings)
55+
assert wrapper.settings.pdf_loader_data_dir_path == "/custom/path"
56+
57+
@patch("template_langgraph.utilities.pdf_loaders.glob")
58+
@patch("template_langgraph.utilities.pdf_loaders.PyPDFLoader")
59+
def test_load_pdf_docs_no_files(self, mock_pdf_loader, mock_glob):
60+
"""Test load_pdf_docs when no PDF files are found."""
61+
mock_glob.return_value = []
62+
wrapper = PdfLoaderWrapper()
63+
64+
docs = wrapper.load_pdf_docs()
65+
66+
assert docs == []
67+
mock_glob.assert_called_once_with(
68+
os.path.join("./data", "**", "*.pdf"),
69+
recursive=True,
70+
)
71+
72+
@patch("template_langgraph.utilities.pdf_loaders.glob")
73+
@patch("template_langgraph.utilities.pdf_loaders.PyPDFLoader")
74+
def test_load_pdf_docs_with_files(self, mock_pdf_loader, mock_glob):
75+
"""Test load_pdf_docs when PDF files are found."""
76+
# Setup mock data
77+
mock_glob.return_value = ["./data/file1.pdf", "./data/file2.pdf"]
78+
79+
mock_doc1 = Document(page_content="Content 1", metadata={"source": "file1.pdf"})
80+
mock_doc2 = Document(page_content="Content 2", metadata={"source": "file1.pdf"})
81+
mock_doc3 = Document(page_content="Content 3", metadata={"source": "file2.pdf"})
82+
83+
mock_loader_instance1 = Mock()
84+
mock_loader_instance1.load_and_split.return_value = [mock_doc1, mock_doc2]
85+
86+
mock_loader_instance2 = Mock()
87+
mock_loader_instance2.load_and_split.return_value = [mock_doc3]
88+
89+
mock_pdf_loader.side_effect = [mock_loader_instance1, mock_loader_instance2]
90+
91+
wrapper = PdfLoaderWrapper()
92+
docs = wrapper.load_pdf_docs()
93+
94+
# Verify results
95+
assert len(docs) == 3
96+
assert docs[0] == mock_doc1
97+
assert docs[1] == mock_doc2
98+
assert docs[2] == mock_doc3
99+
100+
# Verify PyPDFLoader was called correctly
101+
assert mock_pdf_loader.call_count == 2
102+
mock_pdf_loader.assert_any_call("./data/file1.pdf")
103+
mock_pdf_loader.assert_any_call("./data/file2.pdf")
104+
105+
# Verify load_and_split was called with text splitter
106+
mock_loader_instance1.load_and_split.assert_called_once()
107+
mock_loader_instance2.load_and_split.assert_called_once()
108+
109+
@patch("template_langgraph.utilities.pdf_loaders.glob")
110+
@patch("template_langgraph.utilities.pdf_loaders.PyPDFLoader")
111+
def test_load_pdf_docs_with_custom_data_dir(self, mock_pdf_loader, mock_glob):
112+
"""Test load_pdf_docs with custom data directory."""
113+
custom_settings = Settings(pdf_loader_data_dir_path="/custom/data")
114+
wrapper = PdfLoaderWrapper(settings=custom_settings)
115+
mock_glob.return_value = []
116+
117+
wrapper.load_pdf_docs()
118+
119+
mock_glob.assert_called_once_with(
120+
os.path.join("/custom/data", "**", "*.pdf"),
121+
recursive=True,
122+
)
123+
124+
@patch("template_langgraph.utilities.pdf_loaders.glob")
125+
@patch("template_langgraph.utilities.pdf_loaders.PyPDFLoader")
126+
def test_load_pdf_docs_text_splitter_configuration(self, mock_pdf_loader, mock_glob):
127+
"""Test that text splitter is configured correctly."""
128+
mock_glob.return_value = ["./data/test.pdf"]
129+
mock_loader_instance = Mock()
130+
mock_loader_instance.load_and_split.return_value = []
131+
mock_pdf_loader.return_value = mock_loader_instance
132+
133+
wrapper = PdfLoaderWrapper()
134+
wrapper.load_pdf_docs()
135+
136+
# Verify that load_and_split was called with a text splitter
137+
mock_loader_instance.load_and_split.assert_called_once()
138+
args = mock_loader_instance.load_and_split.call_args[0]
139+
assert len(args) == 1
140+
text_splitter = args[0]
141+
142+
# Verify text splitter configuration
143+
assert hasattr(text_splitter, "_chunk_size")
144+
assert hasattr(text_splitter, "_chunk_overlap")

0 commit comments

Comments
 (0)