Skip to content

Commit 01a0798

Browse files
authored
feat(service): accept commit SHA in read endpoints (#3608)
1 parent 5df0ce1 commit 01a0798

File tree

14 files changed

+152
-37
lines changed

14 files changed

+152
-37
lines changed

renku/core/util/git.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
3030
from uuid import uuid4
3131

32+
import git
33+
3234
from renku.core import errors
3335
from renku.infrastructure.repository import DiffChangeType
3436

@@ -712,7 +714,7 @@ def clone_repository(
712714
skip_smudge=True,
713715
recursive=False,
714716
depth=None,
715-
progress=None,
717+
progress: Optional[git.RemoteProgress] = None,
716718
config: Optional[dict] = None,
717719
raise_git_except=False,
718720
checkout_revision=None,
@@ -746,10 +748,8 @@ def clone_repository(
746748

747749
path = Path(path) if path else Path(get_repository_name(url))
748750

749-
def handle_git_exception():
750-
"""Handle git exceptions."""
751-
if raise_git_except:
752-
return
751+
def error_from_progress(progress: Optional[git.RemoteProgress], url: str) -> errors.GitError:
752+
"""Format a Git command error into a more user-friendly format."""
753753

754754
message = f"Cannot clone repo from {url}"
755755

@@ -758,7 +758,7 @@ def handle_git_exception():
758758
error = "".join([f"\n\t{line}" for line in lines if line.strip()])
759759
message += f" - error message:\n {error}"
760760

761-
raise errors.GitError(message)
761+
return errors.GitError(message)
762762

763763
def clean_directory(clean: bool):
764764
if not clean or not path:
@@ -825,8 +825,9 @@ def clone(branch, depth):
825825
repository = clone(branch=checkout_revision, depth=depth)
826826
except errors.GitCommandError:
827827
if not checkout_revision:
828-
handle_git_exception()
829-
raise
828+
if raise_git_except:
829+
raise
830+
raise error_from_progress(progress, url)
830831

831832
# NOTE: Delete the partially-cloned repository
832833
clean_directory(clean=True)
@@ -835,8 +836,9 @@ def clone(branch, depth):
835836
try:
836837
repository = clone(branch=None, depth=None)
837838
except errors.GitCommandError:
838-
handle_git_exception()
839-
raise
839+
if raise_git_except:
840+
raise
841+
raise error_from_progress(progress, url)
840842

841843
if checkout_revision is not None and not no_checkout:
842844
try:

renku/infrastructure/repository.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from pathlib import Path
3131
from typing import (
3232
Any,
33-
Callable,
3433
Dict,
3534
Generator,
3635
List,
@@ -1001,20 +1000,24 @@ def clone_from(
10011000
branch: Optional[str] = None,
10021001
recursive: bool = False,
10031002
depth: Optional[int] = None,
1004-
progress: Optional[Callable] = None,
1003+
progress: Optional[git.RemoteProgress] = None,
10051004
no_checkout: bool = False,
10061005
env: Optional[dict] = None,
10071006
clone_options: Optional[List[str]] = None,
10081007
) -> "Repository":
1009-
"""Clone a remote repository and create an instance."""
1008+
"""Clone a remote repository and create an instance.
1009+
1010+
Since this is just a thin wrapper around GitPython note that the branch parameter
1011+
can work and accept either a branch name or a tag. But it will not work with a commit SHA.
1012+
"""
10101013
try:
10111014
repository = git.Repo.clone_from(
10121015
url=url,
10131016
to_path=path,
1014-
branch=branch,
1017+
branch=branch, # NOTE: Git python will accept tag or branch here but not SHA
10151018
recursive=recursive,
10161019
depth=depth,
1017-
progress=progress,
1020+
progress=progress, # type: ignore[arg-type]
10181021
no_checkout=no_checkout,
10191022
env=env,
10201023
multi_options=clone_options,

renku/ui/service/cache/models/project.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626

2727
from renku.ui.service.cache.base import BaseCache
2828
from renku.ui.service.config import CACHE_PROJECTS_PATH
29-
from renku.ui.service.utils import normalize_git_url
3029

3130
MAX_CONCURRENT_PROJECT_REQUESTS = 10
3231
LOCK_TIMEOUT = 15
3332
NO_BRANCH_FOLDER = "__default_branch__"
33+
DETACHED_HEAD_FOLDER_PREFIX = "__detached_head_"
3434

3535

3636
class Project(Model):
@@ -55,14 +55,21 @@ class Project(Model):
5555
description = TextField()
5656
owner = TextField()
5757
initialized = BooleanField()
58+
commit_sha = TextField()
59+
branch = TextField()
5860

5961
@property
6062
def abs_path(self) -> Path:
6163
"""Full path of cached project."""
62-
branch = self.branch
64+
folder_name = self.branch
6365
if not self.branch:
64-
branch = NO_BRANCH_FOLDER
65-
return CACHE_PROJECTS_PATH / self.user_id / self.owner / normalize_git_url(self.slug) / branch
66+
if self.commit_sha:
67+
# NOTE: Detached head state
68+
folder_name = f"{DETACHED_HEAD_FOLDER_PREFIX}{self.commit_sha}"
69+
else:
70+
# NOTE: We are on the default branch (i.e. main)
71+
folder_name = NO_BRANCH_FOLDER
72+
return CACHE_PROJECTS_PATH / self.user_id / self.owner / self.slug / folder_name
6673

6774
def read_lock(self, timeout: Optional[float] = None):
6875
"""Shared read lock on the project."""

renku/ui/service/cache/serializers/project.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import uuid
1818
from datetime import datetime
1919

20-
from marshmallow import fields, post_load
20+
from marshmallow import ValidationError, fields, post_load, validates_schema
2121

2222
from renku.ui.service.cache.models.project import Project
2323
from renku.ui.service.serializers.common import AccessSchema, CreationSchema, MandatoryUserSchema
@@ -39,11 +39,20 @@ class ProjectSchema(CreationSchema, AccessSchema, MandatoryUserSchema):
3939
description = fields.String(load_default=None)
4040
owner = fields.String(required=True)
4141
initialized = fields.Boolean(dump_default=False)
42+
commit_sha = fields.String(required=False, load_default=None, dump_default=None)
43+
branch = fields.String(required=False, load_default=None, dump_default=None)
4244

4345
@post_load
4446
def make_project(self, data, **options):
4547
"""Construct project object."""
46-
data["git_url"] = normalize_git_url(data["git_url"])
48+
if data.get("git_url"):
49+
data["git_url"] = normalize_git_url(data["git_url"])
4750
data["name"] = normalize_git_url(data["name"])
4851
data["slug"] = normalize_git_url(data["slug"])
4952
return Project(**data)
53+
54+
@validates_schema
55+
def ensure_only_commit_sha_or_branch(self, data, **kwargs):
56+
"""Checks that only a commit SHA or branch is set and not both."""
57+
if data.get("commit_sha") and data.get("branch"):
58+
raise ValidationError("You cannot specify a commit SHA and a branch, only one or the other")

renku/ui/service/controllers/api/mixins.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ def local(self):
188188
self.request_data.get("branch"),
189189
self.user,
190190
self.clone_depth is not None,
191+
self.request_data.get("commit_sha"),
191192
)
192193

193194
self.context["project_id"] = project.project_id

renku/ui/service/gateways/repository_cache.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,13 @@ class LocalRepositoryCache(IRepositoryCache):
4545
"""Cache for project repos stored on local disk."""
4646

4747
def get(
48-
self, cache: ServiceCache, git_url: str, branch: Optional[str], user: User, shallow: bool = True
48+
self,
49+
cache: ServiceCache,
50+
git_url: str,
51+
branch: Optional[str],
52+
user: User,
53+
shallow: bool = True,
54+
commit_sha: Optional[str] = None,
4955
) -> Project:
5056
"""Get a project from cache (clone if necessary)."""
5157
if git_url is None:
@@ -58,12 +64,12 @@ def get(
5864
)
5965
except ValueError:
6066
# project not found in DB
61-
return self._clone_project(cache, git_url, branch, user, shallow)
67+
return self._clone_project(cache, git_url, branch, user, shallow, commit_sha)
6268

6369
if not project.abs_path.exists():
6470
# cache folder doesn't exist anymore
6571
project.delete()
66-
return self._clone_project(cache, git_url, branch, user, shallow)
72+
return self._clone_project(cache, git_url, branch, user, shallow, commit_sha)
6773

6874
if not shallow and project.is_shallow:
6975
self._unshallow_project(project, user)
@@ -100,7 +106,13 @@ def _update_project_access_date(self, project: Project):
100106
project.save()
101107

102108
def _clone_project(
103-
self, cache: ServiceCache, git_url: str, branch: Optional[str], user: User, shallow: bool = True
109+
self,
110+
cache: ServiceCache,
111+
git_url: str,
112+
branch: Optional[str],
113+
user: User,
114+
shallow: bool = True,
115+
commit_sha: Optional[str] = None,
104116
) -> Project:
105117
"""Clone a project to cache."""
106118
git_url = normalize_git_url(git_url)
@@ -124,6 +136,7 @@ def _clone_project(
124136
"branch": branch,
125137
"git_url": git_url,
126138
"user_id": user.user_id,
139+
"commit_sha": commit_sha,
127140
}
128141
project = cache.make_project(user, project_data, persist=False)
129142

@@ -139,6 +152,7 @@ def _clone_project(
139152
(Project.user_id == user.user_id)
140153
& (Project.git_url == git_url)
141154
& (Project.branch == branch)
155+
& (Project.commit_sha == commit_sha)
142156
& (Project.project_id != project.project_id)
143157
)
144158
except ValueError:
@@ -170,7 +184,7 @@ def _clone_project(
170184
"user.email": user.email,
171185
"pull.rebase": False,
172186
},
173-
checkout_revision=project.branch,
187+
checkout_revision=commit_sha or project.branch,
174188
)
175189
).output
176190
project.save()
@@ -186,6 +200,9 @@ def _clone_project(
186200

187201
def _unshallow_project(self, project: Project, user: User):
188202
"""Turn a shallow clone into a full clone."""
203+
if project.commit_sha is not None:
204+
# NOTE: A project in a detached head state at a specific commit SHA does not make sense to be unshallowed
205+
return
189206
try:
190207
with project.write_lock(), Repository(project.abs_path) as repository:
191208
try:
@@ -208,6 +225,10 @@ def _maybe_update_cache(self, project: Project, user: User):
208225
if project.fetch_age < PROJECT_FETCH_TIME:
209226
return
210227

228+
if project.commit_sha is not None:
229+
# NOTE: A project in a detached head state at a specific commit SHA cannot be updated
230+
return
231+
211232
try:
212233
with project.write_lock(), Repository(project.abs_path) as repository:
213234
try:

renku/ui/service/serializers/common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ def set_branch_from_ref(self, data, **_):
6767
return data
6868

6969

70+
class GitCommitSHA:
71+
"""Schema for a commit SHA."""
72+
73+
commit_sha = fields.String(load_default=None, metadata={"description": "Git commit SHA."})
74+
75+
7076
class AsyncSchema(Schema):
7177
"""Schema for adding a commit at the end of the operation."""
7278

renku/ui/service/serializers/config.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,17 @@
1717

1818
from marshmallow import Schema, fields
1919

20-
from renku.ui.service.serializers.common import AsyncSchema, MigrateSchema, RemoteRepositorySchema, RenkuSyncSchema
20+
from renku.ui.service.serializers.common import (
21+
AsyncSchema,
22+
GitCommitSHA,
23+
MigrateSchema,
24+
RemoteRepositorySchema,
25+
RenkuSyncSchema,
26+
)
2127
from renku.ui.service.serializers.rpc import JsonRPCResponse
2228

2329

24-
class ConfigShowRequest(RemoteRepositorySchema):
30+
class ConfigShowRequest(RemoteRepositorySchema, GitCommitSHA):
2531
"""Request schema for config show."""
2632

2733

renku/ui/service/serializers/datasets.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from renku.domain_model.dataset import ImageObjectRequestJson
2323
from renku.ui.service.serializers.common import (
2424
AsyncSchema,
25+
GitCommitSHA,
2526
JobDetailsResponse,
2627
MigrateSchema,
2728
RemoteRepositorySchema,
@@ -120,7 +121,7 @@ class DatasetAddResponseRPC(JsonRPCResponse):
120121
result = fields.Nested(DatasetAddResponse)
121122

122123

123-
class DatasetListRequest(RemoteRepositorySchema):
124+
class DatasetListRequest(RemoteRepositorySchema, GitCommitSHA):
124125
"""Request schema for dataset list view."""
125126

126127

@@ -142,7 +143,7 @@ class DatasetListResponseRPC(JsonRPCResponse):
142143
result = fields.Nested(DatasetListResponse)
143144

144145

145-
class DatasetFilesListRequest(DatasetSlugSchema, RemoteRepositorySchema):
146+
class DatasetFilesListRequest(DatasetSlugSchema, RemoteRepositorySchema, GitCommitSHA):
146147
"""Request schema for dataset files list view."""
147148

148149

renku/ui/service/serializers/graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
"""Renku graph serializers."""
1717
from marshmallow import Schema, fields, validate
1818

19-
from renku.ui.service.serializers.common import AsyncSchema, MigrateSchema, RemoteRepositorySchema
19+
from renku.ui.service.serializers.common import AsyncSchema, GitCommitSHA, MigrateSchema, RemoteRepositorySchema
2020
from renku.ui.service.serializers.rpc import JsonRPCResponse
2121

2222

23-
class GraphExportRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema):
23+
class GraphExportRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema, GitCommitSHA):
2424
"""Request schema for dataset list view."""
2525

2626
callback_url = fields.URL()

0 commit comments

Comments
 (0)