Skip to content

Commit 065346a

Browse files
devin-ai-integration[bot]João
andcommitted
fix: pass kwargs to loader in CrewAIRagAdapter to enable private repo access
Fixes #4088 The GithubSearchTool was unable to access private repositories because the CrewAIRagAdapter.add() method was not passing kwargs (including metadata with gh_token) to the loader.load() call. This fix ensures that kwargs are properly forwarded to the loader, allowing the GithubLoader to receive the gh_token for authentication. Added tests: - test_github_loader.py: Unit tests for GithubLoader - test_crewai_rag_adapter.py: Tests for kwargs passthrough in adapter Co-Authored-By: João <joao@crewai.com>
1 parent 75ff7dc commit 065346a

File tree

3 files changed

+358
-1
lines changed

3 files changed

+358
-1
lines changed

lib/crewai-tools/src/crewai_tools/adapters/crewai_rag_adapter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def add(self, *args: ContentItem, **kwargs: Unpack[AddDocumentParams]) -> None:
319319
loader = data_type.get_loader()
320320
chunker = data_type.get_chunker()
321321

322-
loader_result: LoaderResult = loader.load(source_content)
322+
loader_result: LoaderResult = loader.load(source_content, **kwargs)
323323

324324
chunks = chunker.chunk(loader_result.content)
325325

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
import pytest
4+
5+
from crewai_tools.rag.base_loader import LoaderResult
6+
from crewai_tools.rag.data_types import DataType
7+
8+
9+
class TestCrewAIRagAdapterKwargsPassthrough:
10+
"""Tests to verify that kwargs (including metadata with gh_token) are passed to loaders."""
11+
12+
@patch("crewai_tools.adapters.crewai_rag_adapter.get_rag_client")
13+
def test_add_passes_kwargs_to_loader(
14+
self, mock_get_rag_client: MagicMock
15+
) -> None:
16+
"""Test that kwargs are passed through to the loader.load() method.
17+
18+
This is a regression test for GitHub issue #4088 where GithubSearchTool
19+
was unable to access private repositories because the gh_token in metadata
20+
was not being passed to the GithubLoader.
21+
"""
22+
from crewai_tools.adapters.crewai_rag_adapter import CrewAIRagAdapter
23+
24+
mock_client = MagicMock()
25+
mock_get_rag_client.return_value = mock_client
26+
mock_client.search.return_value = []
27+
28+
adapter = CrewAIRagAdapter(collection_name="test_collection")
29+
30+
mock_loader = MagicMock()
31+
mock_loader.load.return_value = LoaderResult(
32+
content="Test content",
33+
metadata={"source": "https://github.com/owner/repo"},
34+
doc_id="test_doc_id",
35+
)
36+
37+
mock_chunker = MagicMock()
38+
mock_chunker.chunk.return_value = ["Test content"]
39+
40+
test_metadata = {"gh_token": "ghp_test_token", "content_types": ["repo"]}
41+
42+
with patch.object(DataType.GITHUB, "get_loader", return_value=mock_loader):
43+
with patch.object(DataType.GITHUB, "get_chunker", return_value=mock_chunker):
44+
adapter.add(
45+
"https://github.com/owner/repo",
46+
data_type=DataType.GITHUB,
47+
metadata=test_metadata,
48+
)
49+
50+
mock_loader.load.assert_called_once()
51+
call_kwargs = mock_loader.load.call_args[1]
52+
assert "metadata" in call_kwargs
53+
assert call_kwargs["metadata"]["gh_token"] == "ghp_test_token"
54+
assert call_kwargs["metadata"]["content_types"] == ["repo"]
55+
56+
@patch("crewai_tools.adapters.crewai_rag_adapter.get_rag_client")
57+
def test_add_passes_all_kwargs_to_loader(
58+
self, mock_get_rag_client: MagicMock
59+
) -> None:
60+
"""Test that all kwargs are passed through to the loader."""
61+
from crewai_tools.adapters.crewai_rag_adapter import CrewAIRagAdapter
62+
63+
mock_client = MagicMock()
64+
mock_get_rag_client.return_value = mock_client
65+
mock_client.search.return_value = []
66+
67+
adapter = CrewAIRagAdapter(collection_name="test_collection")
68+
69+
mock_loader = MagicMock()
70+
mock_loader.load.return_value = LoaderResult(
71+
content="Test content",
72+
metadata={"source": "test"},
73+
doc_id="test_doc_id",
74+
)
75+
76+
mock_chunker = MagicMock()
77+
mock_chunker.chunk.return_value = ["Test content"]
78+
79+
with patch.object(DataType.TEXT, "get_loader", return_value=mock_loader):
80+
with patch.object(DataType.TEXT, "get_chunker", return_value=mock_chunker):
81+
adapter.add(
82+
"Some text content",
83+
data_type=DataType.TEXT,
84+
metadata={"custom_key": "custom_value"},
85+
)
86+
87+
mock_loader.load.assert_called_once()
88+
call_kwargs = mock_loader.load.call_args[1]
89+
assert "metadata" in call_kwargs
90+
assert call_kwargs["metadata"]["custom_key"] == "custom_value"
91+
92+
93+
class TestGithubSearchToolPrivateRepoAccess:
94+
"""Integration tests for GithubSearchTool private repository access.
95+
96+
These tests verify the fix for GitHub issue #4088.
97+
"""
98+
99+
def setup_mock_repo(
100+
self,
101+
full_name: str = "owner/repo",
102+
description: str = "Test repo",
103+
) -> MagicMock:
104+
mock_repo = MagicMock()
105+
mock_repo.full_name = full_name
106+
mock_repo.description = description
107+
mock_repo.language = "Python"
108+
mock_repo.stargazers_count = 10
109+
mock_repo.forks_count = 2
110+
111+
readme = MagicMock()
112+
readme.decoded_content = b"# README"
113+
mock_repo.get_readme.return_value = readme
114+
mock_repo.get_contents.return_value = []
115+
mock_repo.get_pulls.return_value = []
116+
mock_repo.get_issues.return_value = []
117+
118+
return mock_repo
119+
120+
@patch("crewai_tools.rag.loaders.github_loader.Github")
121+
def test_github_search_tool_passes_token_to_loader(
122+
self, mock_github_class: MagicMock
123+
) -> None:
124+
"""Test that GithubSearchTool passes gh_token through to GithubLoader.
125+
126+
This is the main regression test for issue #4088.
127+
This test directly tests the GithubLoader to verify the token is passed.
128+
"""
129+
from crewai_tools.rag.loaders.github_loader import GithubLoader
130+
from crewai_tools.rag.source_content import SourceContent
131+
132+
mock_github = MagicMock()
133+
mock_github_class.return_value = mock_github
134+
mock_github.get_repo.return_value = self.setup_mock_repo(
135+
full_name="owner/private-repo"
136+
)
137+
138+
loader = GithubLoader()
139+
loader.load(
140+
SourceContent("https://github.com/owner/private-repo"),
141+
metadata={"gh_token": "ghp_test_private_token", "content_types": ["repo"]},
142+
)
143+
144+
mock_github_class.assert_called_with("ghp_test_private_token")
145+
146+
@patch("crewai_tools.rag.loaders.github_loader.Github")
147+
def test_github_search_tool_without_token_uses_public_access(
148+
self, mock_github_class: MagicMock
149+
) -> None:
150+
"""Test that GithubSearchTool without token uses public GitHub access."""
151+
from crewai_tools.rag.loaders.github_loader import GithubLoader
152+
from crewai_tools.rag.source_content import SourceContent
153+
154+
mock_github = MagicMock()
155+
mock_github_class.return_value = mock_github
156+
mock_github.get_repo.return_value = self.setup_mock_repo(
157+
full_name="owner/public-repo"
158+
)
159+
160+
loader = GithubLoader()
161+
loader.load(
162+
SourceContent("https://github.com/owner/public-repo"),
163+
metadata={"content_types": ["repo"]},
164+
)
165+
166+
mock_github_class.assert_called_once()
167+
call_args = mock_github_class.call_args
168+
assert call_args == ((None,),) or call_args == ((),)
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
import pytest
4+
from github import GithubException
5+
6+
from crewai_tools.rag.base_loader import LoaderResult
7+
from crewai_tools.rag.loaders.github_loader import GithubLoader
8+
from crewai_tools.rag.source_content import SourceContent
9+
10+
11+
class TestGithubLoader:
12+
def setup_mock_repo(
13+
self,
14+
full_name: str = "owner/repo",
15+
description: str = "Test repo",
16+
language: str = "Python",
17+
stars: int = 100,
18+
forks: int = 10,
19+
) -> MagicMock:
20+
repo = MagicMock()
21+
repo.full_name = full_name
22+
repo.description = description
23+
repo.language = language
24+
repo.stargazers_count = stars
25+
repo.forks_count = forks
26+
27+
readme = MagicMock()
28+
readme.decoded_content = b"# Test README\n\nThis is a test."
29+
repo.get_readme.return_value = readme
30+
31+
content_file = MagicMock()
32+
content_file.path = "README.md"
33+
content_file.type = "file"
34+
repo.get_contents.return_value = [content_file]
35+
36+
repo.get_pulls.return_value = []
37+
repo.get_issues.return_value = []
38+
39+
return repo
40+
41+
@patch("crewai_tools.rag.loaders.github_loader.Github")
42+
def test_load_public_repo_without_token(self, mock_github_class: MagicMock) -> None:
43+
mock_github = MagicMock()
44+
mock_github_class.return_value = mock_github
45+
mock_github.get_repo.return_value = self.setup_mock_repo()
46+
47+
loader = GithubLoader()
48+
result = loader.load(
49+
SourceContent("https://github.com/owner/repo"),
50+
metadata={"content_types": ["repo", "code"]},
51+
)
52+
53+
assert isinstance(result, LoaderResult)
54+
assert "owner/repo" in result.content
55+
mock_github_class.assert_called_once()
56+
call_args = mock_github_class.call_args
57+
assert call_args == ((None,),) or call_args == ((),)
58+
59+
@patch("crewai_tools.rag.loaders.github_loader.Github")
60+
def test_load_with_token_passes_token_to_github(
61+
self, mock_github_class: MagicMock
62+
) -> None:
63+
mock_github = MagicMock()
64+
mock_github_class.return_value = mock_github
65+
mock_github.get_repo.return_value = self.setup_mock_repo()
66+
67+
loader = GithubLoader()
68+
result = loader.load(
69+
SourceContent("https://github.com/owner/private-repo"),
70+
metadata={"gh_token": "ghp_test_token_123", "content_types": ["repo"]},
71+
)
72+
73+
assert isinstance(result, LoaderResult)
74+
mock_github_class.assert_called_once_with("ghp_test_token_123")
75+
76+
@patch("crewai_tools.rag.loaders.github_loader.Github")
77+
def test_private_repo_access_fails_without_token(
78+
self, mock_github_class: MagicMock
79+
) -> None:
80+
mock_github = MagicMock()
81+
mock_github_class.return_value = mock_github
82+
mock_github.get_repo.side_effect = GithubException(
83+
404, {"message": "Not Found"}, None
84+
)
85+
86+
loader = GithubLoader()
87+
with pytest.raises(ValueError, match="Unable to access repository"):
88+
loader.load(
89+
SourceContent("https://github.com/owner/private-repo"),
90+
metadata={"content_types": ["repo"]},
91+
)
92+
93+
@patch("crewai_tools.rag.loaders.github_loader.Github")
94+
def test_private_repo_access_succeeds_with_token(
95+
self, mock_github_class: MagicMock
96+
) -> None:
97+
mock_github = MagicMock()
98+
mock_github_class.return_value = mock_github
99+
mock_github.get_repo.return_value = self.setup_mock_repo(
100+
full_name="owner/private-repo"
101+
)
102+
103+
loader = GithubLoader()
104+
result = loader.load(
105+
SourceContent("https://github.com/owner/private-repo"),
106+
metadata={"gh_token": "ghp_valid_token", "content_types": ["repo"]},
107+
)
108+
109+
assert isinstance(result, LoaderResult)
110+
assert "owner/private-repo" in result.content
111+
mock_github_class.assert_called_once_with("ghp_valid_token")
112+
113+
@patch("crewai_tools.rag.loaders.github_loader.Github")
114+
def test_load_with_all_content_types(
115+
self, mock_github_class: MagicMock
116+
) -> None:
117+
mock_github = MagicMock()
118+
mock_github_class.return_value = mock_github
119+
120+
repo = self.setup_mock_repo()
121+
122+
pr = MagicMock()
123+
pr.number = 1
124+
pr.title = "Test PR"
125+
pr.body = "PR description"
126+
repo.get_pulls.return_value = [pr]
127+
128+
issue = MagicMock()
129+
issue.number = 1
130+
issue.title = "Test Issue"
131+
issue.body = "Issue description"
132+
issue.pull_request = None
133+
repo.get_issues.return_value = [issue]
134+
135+
mock_github.get_repo.return_value = repo
136+
137+
loader = GithubLoader()
138+
result = loader.load(
139+
SourceContent("https://github.com/owner/repo"),
140+
metadata={"content_types": ["repo", "code", "pr", "issue"]},
141+
)
142+
143+
assert "Repository: owner/repo" in result.content
144+
assert "README" in result.content
145+
assert "Test PR" in result.content
146+
assert "Test Issue" in result.content
147+
148+
@patch("crewai_tools.rag.loaders.github_loader.Github")
149+
def test_invalid_github_url(self, mock_github_class: MagicMock) -> None:
150+
loader = GithubLoader()
151+
with pytest.raises(ValueError, match="Invalid GitHub URL"):
152+
loader.load(SourceContent("https://gitlab.com/owner/repo"))
153+
154+
@patch("crewai_tools.rag.loaders.github_loader.Github")
155+
def test_invalid_repo_url_format(self, mock_github_class: MagicMock) -> None:
156+
loader = GithubLoader()
157+
with pytest.raises(ValueError, match="Invalid GitHub repository URL"):
158+
loader.load(SourceContent("https://github.com/owner"))
159+
160+
@patch("crewai_tools.rag.loaders.github_loader.Github")
161+
def test_default_content_types(self, mock_github_class: MagicMock) -> None:
162+
mock_github = MagicMock()
163+
mock_github_class.return_value = mock_github
164+
mock_github.get_repo.return_value = self.setup_mock_repo()
165+
166+
loader = GithubLoader()
167+
result = loader.load(
168+
SourceContent("https://github.com/owner/repo"),
169+
metadata={},
170+
)
171+
172+
assert "Repository: owner/repo" in result.content
173+
assert "README" in result.content
174+
175+
@patch("crewai_tools.rag.loaders.github_loader.Github")
176+
def test_metadata_in_result(self, mock_github_class: MagicMock) -> None:
177+
mock_github = MagicMock()
178+
mock_github_class.return_value = mock_github
179+
mock_github.get_repo.return_value = self.setup_mock_repo()
180+
181+
loader = GithubLoader()
182+
result = loader.load(
183+
SourceContent("https://github.com/owner/repo"),
184+
metadata={"content_types": ["repo"]},
185+
)
186+
187+
assert result.metadata["source"] == "https://github.com/owner/repo"
188+
assert result.metadata["repo"] == "owner/repo"
189+
assert result.metadata["content_types"] == ["repo"]

0 commit comments

Comments
 (0)