Skip to content

Commit b04b06d

Browse files
Merge pull request #1 from jacksonthall22/jacksonthall22/ui-submodules
Web: add recurse submodules option
2 parents 4e259a0 + f2356f3 commit b04b06d

File tree

10 files changed

+157
-1
lines changed

10 files changed

+157
-1
lines changed

src/gitingest/clone.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,59 @@
2727
# Initialize logger for this module
2828
logger = get_logger(__name__)
2929

30+
_ASKPASS_SCRIPT_NAME = "gitingest-askpass.sh"
31+
32+
33+
def _write_git_askpass_script(repo_path: Path) -> Path:
34+
"""Write a small askpass helper into ``.git`` that reads the token from env.
35+
36+
The script never embeds secrets; it prints the username and reads the token from
37+
``GITINGEST_GIT_PASSWORD``.
38+
"""
39+
git_dir = repo_path / ".git"
40+
git_dir.mkdir(parents=True, exist_ok=True)
41+
42+
askpass_path = git_dir / _ASKPASS_SCRIPT_NAME
43+
askpass_path.write_text(
44+
"#!/bin/sh\n"
45+
'case "$1" in\n'
46+
' Username*) echo "x-access-token" ;;\n'
47+
' Password*) echo "${GITINGEST_GIT_PASSWORD:-}" ;;\n'
48+
' *) echo "" ;;\n'
49+
"esac\n",
50+
encoding="utf-8",
51+
)
52+
try:
53+
askpass_path.chmod(0o700)
54+
except OSError:
55+
# Best-effort on platforms where chmod may be unsupported.
56+
pass
57+
return askpass_path
58+
59+
60+
def _configure_submodule_auth(repo: git.Repo, *, token: str | None, url: str, local_path: str) -> None:
61+
"""Disable interactive prompts and provide an askpass hook for private submodules."""
62+
try:
63+
repo.git.update_environment(GIT_TERMINAL_PROMPT="0")
64+
except Exception:
65+
# Best-effort: if the GitPython object doesn't support env updates, continue.
66+
return
67+
68+
if not (token and is_github_host(url)):
69+
return
70+
71+
try:
72+
askpass_path = _write_git_askpass_script(Path(local_path))
73+
except OSError:
74+
logger.exception("Failed to write GIT_ASKPASS helper", extra={"local_path": local_path})
75+
return
76+
77+
repo.git.update_environment(
78+
GIT_ASKPASS=str(askpass_path),
79+
GIT_TERMINAL_PROMPT="0",
80+
GITINGEST_GIT_PASSWORD=token,
81+
)
82+
3083

3184
@async_timeout(DEFAULT_TIMEOUT)
3285
async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
@@ -169,6 +222,7 @@ async def _perform_post_clone_operations(
169222
# Update submodules
170223
if config.include_submodules:
171224
logger.info("Updating submodules")
225+
_configure_submodule_auth(repo, token=token, url=url, local_path=local_path)
172226
repo.git.submodule("update", "--init", "--recursive", "--depth=1")
173227
logger.debug("Submodules updated successfully")
174228
except git.GitCommandError as exc:

src/server/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ class IngestRequest(BaseModel):
2929
----------
3030
input_text : str
3131
The Git repository URL or slug to ingest.
32+
include_submodules : bool
33+
If ``True``, recursively clone and include Git submodules (default: ``False``).
3234
max_file_size : int
3335
Maximum file size slider position (0-500) for filtering files.
3436
pattern_type : PatternType
@@ -41,6 +43,7 @@ class IngestRequest(BaseModel):
4143
"""
4244

4345
input_text: str = Field(..., description="Git repository URL or slug to ingest")
46+
include_submodules: bool = Field(default=False, description="Recursively clone and include Git submodules")
4447
max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description="File size in KB")
4548
pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering")
4649
pattern: str = Field(default="", description="Glob/regex pattern for file filtering")

src/server/query_processor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ async def _check_s3_cache(
9191
repo_name=cast("str", query.repo_name),
9292
commit=query.commit,
9393
subpath=query.subpath,
94+
include_submodules=query.include_submodules,
9495
include_patterns=query.include_patterns,
9596
ignore_patterns=query.ignore_patterns,
9697
)
@@ -170,6 +171,7 @@ def _store_digest_content(
170171
repo_name=cast("str", query.repo_name),
171172
commit=query.commit,
172173
subpath=query.subpath,
174+
include_submodules=query.include_submodules,
173175
include_patterns=query.include_patterns,
174176
ignore_patterns=query.ignore_patterns,
175177
)
@@ -232,6 +234,7 @@ async def process_query(
232234
pattern_type: PatternType,
233235
pattern: str,
234236
token: str | None = None,
237+
include_submodules: bool = False,
235238
) -> IngestResponse:
236239
"""Process a query by parsing input, cloning a repository, and generating a summary.
237240
@@ -250,6 +253,8 @@ async def process_query(
250253
Pattern to include or exclude in the query, depending on the pattern type.
251254
token : str | None
252255
GitHub personal access token (PAT) for accessing private repositories.
256+
include_submodules : bool
257+
If ``True``, recursively clone and include Git submodules.
253258
254259
Returns
255260
-------
@@ -272,6 +277,7 @@ async def process_query(
272277
return IngestErrorResponse(error=str(exc))
273278

274279
query.url = cast("str", query.url)
280+
query.include_submodules = include_submodules
275281
query.max_file_size = max_file_size * 1024 # Convert to bytes since we currently use KB in higher levels
276282
query.ignore_patterns, query.include_patterns = process_patterns(
277283
exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None,

src/server/routers/ingest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ async def api_ingest(
4242
"""
4343
response = await _perform_ingestion(
4444
input_text=ingest_request.input_text,
45+
include_submodules=ingest_request.include_submodules,
4546
max_file_size=ingest_request.max_file_size,
4647
pattern_type=ingest_request.pattern_type.value,
4748
pattern=ingest_request.pattern,
@@ -58,6 +59,7 @@ async def api_ingest_get(
5859
request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument
5960
user: str,
6061
repository: str,
62+
include_submodules: bool = False,
6163
max_file_size: int = DEFAULT_FILE_SIZE_KB,
6264
pattern_type: str = "exclude",
6365
pattern: str = "",
@@ -74,6 +76,7 @@ async def api_ingest_get(
7476
- **repository** (`str`): GitHub repository name
7577
7678
**Query Parameters**
79+
- **include_submodules** (`bool`, optional): Whether to recursively clone and include Git submodules (default: ``False``)
7780
- **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB)
7881
- **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude")
7982
- **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "")
@@ -84,6 +87,7 @@ async def api_ingest_get(
8487
"""
8588
response = await _perform_ingestion(
8689
input_text=f"{user}/{repository}",
90+
include_submodules=include_submodules,
8791
max_file_size=max_file_size,
8892
pattern_type=pattern_type,
8993
pattern=pattern,

src/server/routers_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
async def _perform_ingestion(
2121
input_text: str,
22+
include_submodules: bool,
2223
max_file_size: int,
2324
pattern_type: str,
2425
pattern: str,
@@ -33,6 +34,7 @@ async def _perform_ingestion(
3334

3435
result = await process_query(
3536
input_text=input_text,
37+
include_submodules=include_submodules,
3638
max_file_size=max_file_size,
3739
pattern_type=pattern_type,
3840
pattern=pattern,

src/server/s3_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def generate_s3_file_path(
6363
repo_name: str,
6464
commit: str,
6565
subpath: str,
66+
include_submodules: bool,
6667
include_patterns: set[str] | None,
6768
ignore_patterns: set[str],
6869
) -> str:
@@ -92,6 +93,8 @@ def generate_s3_file_path(
9293
Set of patterns specifying which files to include.
9394
ignore_patterns : set[str]
9495
Set of patterns specifying which files to exclude.
96+
include_submodules : bool
97+
Whether to recursively clone and include Git submodules.
9598
9699
Returns
97100
-------
@@ -111,7 +114,8 @@ def generate_s3_file_path(
111114
raise ValueError(msg)
112115

113116
# Create hash of exclude/include patterns for uniqueness
114-
patterns_str = f"include:{sorted(include_patterns) if include_patterns else []}"
117+
patterns_str = f"submodules:{int(include_submodules)}"
118+
patterns_str += f"include:{sorted(include_patterns) if include_patterns else []}"
115119
patterns_str += f"exclude:{sorted(ignore_patterns)}"
116120
patterns_hash = hashlib.sha256(patterns_str.encode()).hexdigest()[:16]
117121
subpath_hash = hashlib.sha256(subpath.encode()).hexdigest()[:16]

src/server/templates/components/git_form.jinja

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,23 @@
8585
</div>
8686
<!-- PAT checkbox with PAT field below -->
8787
<div class="flex flex-col items-start w-full sm:col-span-2 lg:col-span-1 lg:row-span-2 lg:pt-3.5">
88+
<!-- Recurse submodules checkbox -->
89+
<div class="flex items-center space-x-2 mb-3">
90+
<label for="include_submodules"
91+
class="flex gap-2 text-gray-900 cursor-pointer">
92+
<div class="relative w-6 h-6">
93+
<input type="checkbox"
94+
id="include_submodules"
95+
name="include_submodules"
96+
{% if include_submodules %}checked{% endif %}
97+
class="cursor-pointer peer appearance-none w-full h-full rounded-sm border-[3px] border-current bg-white m-0 text-current shadow-[3px_3px_0_currentColor]" />
98+
<span class="absolute inset-0 w-3 h-3 m-auto scale-0 transition-transform duration-150 ease-in-out shadow-[inset_1rem_1rem_#FE4A60] bg-[CanvasText] origin-bottom-left peer-checked:scale-100"
99+
style="clip-path: polygon(14% 44%, 0 65%, 50% 100%, 100% 16%, 80% 0%, 43% 62%)"></span>
100+
</div>
101+
Recurse submodules
102+
</label>
103+
<span class="badge-new">NEW</span>
104+
</div>
88105
<!-- PAT checkbox -->
89106
<div class="flex items-center space-x-2">
90107
<label for="showAccessSettings"

src/static/js/utils.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,14 @@ function collectFormData(form) {
126126
const json_data = {};
127127
const inputText = form.querySelector('[name="input_text"]');
128128
const token = form.querySelector('[name="token"]');
129+
const includeSubmodules = form.querySelector('[name="include_submodules"]');
129130
const hiddenInput = document.getElementById('max_file_size_kb');
130131
const patternType = document.getElementById('pattern_type');
131132
const pattern = document.getElementById('pattern');
132133

133134
if (inputText) {json_data.input_text = inputText.value;}
134135
if (token) {json_data.token = token.value;}
136+
if (includeSubmodules) {json_data.include_submodules = includeSubmodules.checked;}
135137
if (hiddenInput) {json_data.max_file_size = hiddenInput.value;}
136138
if (patternType) {json_data.pattern_type = patternType.value;}
137139
if (pattern) {json_data.pattern = pattern.value;}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import pytest
2+
from pytest_mock import MockerFixture
3+
from unittest.mock import AsyncMock
4+
5+
from gitingest.schemas.ingestion import IngestionQuery
6+
from src.server.models import PatternType
7+
8+
9+
@pytest.mark.asyncio
10+
async def test_process_query_forwards_include_submodules(mocker: MockerFixture) -> None:
11+
query = IngestionQuery(
12+
host="github.com",
13+
user_name="octocat",
14+
repo_name="Hello-World",
15+
local_path="/tmp/gitingest/test-include-submodules",
16+
url="https://github.com/octocat/Hello-World",
17+
slug="octocat/Hello-World",
18+
id="00000000-0000-0000-0000-000000000001",
19+
branch="main",
20+
commit="deadbeef",
21+
)
22+
23+
mocker.patch("src.server.query_processor.parse_remote_repo", new_callable=AsyncMock, return_value=query)
24+
clone_repo = mocker.patch("src.server.query_processor.clone_repo", new_callable=AsyncMock)
25+
mocker.patch("src.server.query_processor.ingest_query", return_value=("summary", "tree", "content"))
26+
mocker.patch("src.server.query_processor._store_digest_content")
27+
mocker.patch("src.server.query_processor._cleanup_repository")
28+
mocker.patch("src.server.query_processor._print_success")
29+
30+
from src.server.query_processor import process_query
31+
32+
await process_query(
33+
input_text="https://github.com/octocat/Hello-World",
34+
max_file_size=243,
35+
pattern_type=PatternType.EXCLUDE,
36+
pattern="",
37+
token=None,
38+
include_submodules=True,
39+
)
40+
41+
assert clone_repo.call_count == 1
42+
passed_config = clone_repo.call_args.args[0]
43+
assert getattr(passed_config, "include_submodules") is True

tests/test_clone.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,27 @@ async def test_clone_with_include_submodules(gitpython_mocks: dict) -> None:
205205
mock_repo.git.submodule.assert_called_with("update", "--init", "--recursive", "--depth=1")
206206

207207

208+
@pytest.mark.asyncio
209+
async def test_clone_with_private_submodules_uses_askpass(tmp_path: Path, gitpython_mocks: dict) -> None:
210+
"""Test cloning a repo with private submodules using a non-interactive auth helper.
211+
212+
Given ``include_submodules=True`` and a GitHub token:
213+
When ``clone_repo`` is called,
214+
Then it should configure GIT_ASKPASS so submodules can be cloned without a TTY prompt.
215+
"""
216+
local_repo_path = tmp_path / "repo"
217+
clone_config = CloneConfig(url=DEMO_URL, local_path=str(local_repo_path), branch="main", include_submodules=True)
218+
219+
await clone_repo(clone_config, token="token123") # noqa: S106 (test-only)
220+
221+
askpass_path = local_repo_path / ".git" / "gitingest-askpass.sh"
222+
assert askpass_path.exists()
223+
assert "token123" not in askpass_path.read_text(encoding="utf-8")
224+
225+
mock_repo = gitpython_mocks["repo"]
226+
assert any("GIT_ASKPASS" in kwargs for _, kwargs in mock_repo.git.update_environment.call_args_list)
227+
228+
208229
@pytest.mark.asyncio
209230
async def test_check_repo_exists_with_auth_token(mocker: MockerFixture) -> None:
210231
"""Test ``check_repo_exists`` with authentication token.

0 commit comments

Comments
 (0)