Skip to content

Commit 56b06b5

Browse files
authored
fix(core): automatically cleanup dangling git processes (#2928)
1 parent 5cc006c commit 56b06b5

File tree

4 files changed

+133
-64
lines changed

4 files changed

+133
-64
lines changed

renku/command/command_builder/command.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ def __init__(self) -> None:
171171
self._track_std_streams: bool = False
172172
self._working_directory: Optional[str] = None
173173
self._client: Optional["LocalClient"] = None
174+
self._client_was_created: bool = False
174175

175176
def __getattr__(self, name: str) -> Any:
176177
"""Bubble up attributes of wrapped builders."""
@@ -205,6 +206,7 @@ def _injection_pre_hook(self, builder: "Command", context: dict, *args, **kwargs
205206
dispatcher.push_created_client_to_stack(self._client)
206207
else:
207208
self._client = dispatcher.push_client_to_stack(path=default_path(self._working_directory or "."))
209+
self._client_was_created = True
208210
ctx = click.Context(click.Command(builder._operation)) # type: ignore
209211
else:
210212
if not self._client:
@@ -237,6 +239,9 @@ def _post_hook(self, builder: "Command", context: dict, result: "CommandResult",
237239
"""
238240
remove_injector()
239241

242+
if self._client_was_created and self._client and self._client.repository is not None:
243+
self._client.repository.close()
244+
240245
if result.error:
241246
raise result.error
242247

renku/core/management/git.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ def __attrs_post_init__(self):
295295
except errors.GitError:
296296
self.repository = None
297297

298+
def __del__(self):
299+
if self.repository:
300+
self.repository.close()
301+
298302
@property
299303
def modified_paths(self):
300304
"""Return paths of modified files."""

renku/infrastructure/repository.py

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@ def __init__(self, path: Union[Path, str] = ".", repository: Optional[git.Repo]
8686
def __repr__(self) -> str:
8787
return f"<{self.__class__.__name__} {self.path}>"
8888

89+
def __enter__(self):
90+
return self
91+
92+
def __exit__(self, *args):
93+
self.close()
94+
95+
def __del__(self):
96+
self.close()
97+
8998
@property
9099
def path(self) -> Path:
91100
"""Absolute path to the repository's root."""
@@ -675,6 +684,16 @@ def get_user(self) -> "Actor":
675684
configuration = self.get_configuration()
676685
return Repository._get_user_from_configuration(configuration)
677686

687+
def close(self) -> None:
688+
"""Close the underlying repository.
689+
690+
Cleans up dangling processes.
691+
"""
692+
if getattr(self, "_repository", None) is not None:
693+
self._repository.close() # type:ignore
694+
del self._repository
695+
self._repository = None
696+
678697
@staticmethod
679698
def get_global_user() -> "Actor":
680699
"""Return the global git user."""
@@ -795,6 +814,7 @@ def __init__(
795814
self, path: Union[Path, str] = ".", search_parent_directories: bool = False, repository: git.Repo = None
796815
):
797816
repo = repository or _create_repository(path, search_parent_directories)
817+
798818
super().__init__(path=Path(repo.working_dir).resolve(), repository=repo) # type: ignore
799819

800820
@classmethod
@@ -864,7 +884,7 @@ def __init__(self, parent: git.Repo, name: str, path: Union[Path, str], url: str
864884
self._name: str = name
865885
self._url: str = url
866886
try:
867-
self._repository: git.Repo = _create_repository(path, search_parent_directories=False)
887+
self._repository: Optional[git.Repo] = _create_repository(path, search_parent_directories=False)
868888
except errors.GitError:
869889
# NOTE: Submodule directory doesn't exist yet, so, we ignore the error
870890
pass
@@ -881,6 +901,12 @@ def __str__(self) -> str:
881901
def __repr__(self) -> str:
882902
return f"<Submodule {self.relative_path}>"
883903

904+
def __del__(self) -> None:
905+
if getattr(self, "_repository", None) is not None:
906+
self._repository.close() # type:ignore
907+
del self._repository
908+
self._repository = None
909+
884910
@property
885911
def name(self) -> str:
886912
"""Return submodule's name."""
@@ -901,42 +927,75 @@ class SubmoduleManager:
901927
"""Manage submodules of a Repository."""
902928

903929
def __init__(self, repository: git.Repo):
904-
self._repository = repository
930+
self._repository: Optional[git.Repo] = repository
931+
self._submodule_cache: Dict[str, Submodule] = {} # type: ignore
905932
try:
906933
self.update()
907934
except errors.GitError:
908935
# NOTE: Update fails if submodule repo cannot be cloned. Repository still works but submodules are broken.
909936
pass
910937

938+
def _get_submodule(self, submodule: git.Submodule) -> Submodule: # type: ignore
939+
"""Get a submodule from local cache."""
940+
if self._repository is None:
941+
raise errors.ParameterError("Repository not set.")
942+
943+
if submodule.name not in self._submodule_cache:
944+
submodule_result = Submodule.from_submodule(self._repository, submodule)
945+
self._submodule_cache[submodule.name] = submodule_result
946+
return self._submodule_cache[submodule.name]
947+
911948
def __getitem__(self, name: str) -> Submodule:
949+
if self._repository is None:
950+
raise errors.ParameterError("Repository not set.")
951+
912952
try:
913953
submodule = self._repository.submodules[name]
914954
except IndexError:
915955
raise errors.GitError(f"Submodule '{name}' not found")
916956
else:
917-
return Submodule.from_submodule(self._repository, submodule)
957+
return self._get_submodule(submodule)
918958

919959
def __iter__(self):
920-
return (Submodule.from_submodule(self._repository, s) for s in self._repository.submodules)
960+
if self._repository is None:
961+
raise errors.ParameterError("Repository not set.")
962+
963+
for s in self._repository.submodules:
964+
965+
yield self._get_submodule(s)
921966

922967
def __len__(self) -> int:
968+
if self._repository is None:
969+
raise errors.ParameterError("Repository not set.")
970+
923971
return len(self._repository.submodules)
924972

925973
def __repr__(self) -> str:
926974
return str(list(self))
927975

928976
def remove(self, submodule: Union[Submodule, str], force: bool = False):
929977
"""Remove an existing submodule."""
978+
if self._repository is None:
979+
raise errors.ParameterError("Repository not set.")
980+
930981
name = submodule if isinstance(submodule, str) else submodule.name
931982

932983
try:
933-
submodule = self._repository.submodules[name]
934-
submodule.remove(force=force)
984+
git_submodule = self._repository.submodules[name]
985+
git_submodule.remove(force=force)
986+
987+
if name in self._submodule_cache:
988+
submodule = self._submodule_cache[name]
989+
del self._submodule_cache[name]
990+
submodule.close()
935991
except git.GitError as e:
936992
raise errors.GitError(f"Cannot delete submodule '{submodule}'") from e
937993

938994
def update(self, initialize: bool = True):
939995
"""Update all submodule."""
996+
if self._repository is None:
997+
raise errors.ParameterError("Repository not set.")
998+
940999
# NOTE: Git complains if ``--init`` comes before ``update``
9411000
args = ("update", "--init") if initialize else ("update",)
9421001
_run_git_command(self._repository, "submodule", *args)

renku/ui/service/controllers/api/mixins.py

Lines changed: 59 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -190,37 +190,37 @@ def execute_op(self):
190190
ref = self.request_data.get("ref", None)
191191

192192
if ref:
193-
repository = Repository(project.abs_path)
194-
if ref != repository.active_branch.name:
195-
# NOTE: Command called for different branch than the one used in cache, change branch
196-
if len(repository.remotes) != 1:
197-
raise RenkuException("Couldn't find remote for project in cache.")
198-
origin = repository.remotes[0]
199-
remote_branch = f"{origin}/{ref}"
200-
201-
with project.write_lock():
202-
# NOTE: Add new ref to remote branches
203-
repository.run_git_command("remote", "set-branches", "--add", origin, ref)
204-
if self.migrate_project or self.clone_depth == PROJECT_CLONE_NO_DEPTH:
205-
repository.fetch(origin, ref)
206-
else:
207-
repository.fetch(origin, ref, depth=self.clone_depth)
208-
209-
# NOTE: Switch to new ref
210-
repository.run_git_command("checkout", "--track", "-f", "-b", ref, remote_branch)
211-
212-
# NOTE: cleanup remote branches in case a remote was deleted (fetch fails otherwise)
213-
repository.run_git_command("remote", "prune", origin)
214-
215-
for branch in repository.branches:
216-
if branch.remote_branch and not branch.remote_branch.is_valid():
217-
repository.branches.remove(branch, force=True)
218-
# NOTE: Remove left-over refspec
219-
try:
220-
with repository.get_configuration(writable=True) as config:
221-
config.remove_value(f"remote.{origin}.fetch", f"origin.{branch}$")
222-
except GitConfigurationError:
223-
pass
193+
with Repository(project.abs_path) as repository:
194+
if ref != repository.active_branch.name:
195+
# NOTE: Command called for different branch than the one used in cache, change branch
196+
if len(repository.remotes) != 1:
197+
raise RenkuException("Couldn't find remote for project in cache.")
198+
origin = repository.remotes[0]
199+
remote_branch = f"{origin}/{ref}"
200+
201+
with project.write_lock():
202+
# NOTE: Add new ref to remote branches
203+
repository.run_git_command("remote", "set-branches", "--add", origin, ref)
204+
if self.migrate_project or self.clone_depth == PROJECT_CLONE_NO_DEPTH:
205+
repository.fetch(origin, ref)
206+
else:
207+
repository.fetch(origin, ref, depth=self.clone_depth)
208+
209+
# NOTE: Switch to new ref
210+
repository.run_git_command("checkout", "--track", "-f", "-b", ref, remote_branch)
211+
212+
# NOTE: cleanup remote branches in case a remote was deleted (fetch fails otherwise)
213+
repository.run_git_command("remote", "prune", origin)
214+
215+
for branch in repository.branches:
216+
if branch.remote_branch and not branch.remote_branch.is_valid():
217+
repository.branches.remove(branch, force=True)
218+
# NOTE: Remove left-over refspec
219+
try:
220+
with repository.get_configuration(writable=True) as config:
221+
config.remove_value(f"remote.{origin}.fetch", f"origin.{branch}$")
222+
except GitConfigurationError:
223+
pass
224224
else:
225225
self.reset_local_repo(project)
226226

@@ -250,33 +250,33 @@ def reset_local_repo(self, project):
250250
# NOTE: return immediately in case of multiple writers waiting
251251
return
252252

253-
repository = Repository(project.abs_path)
254-
origin = None
255-
tracking_branch = repository.active_branch.remote_branch
256-
if tracking_branch:
257-
origin = tracking_branch.remote
258-
elif len(repository.remotes) == 1:
259-
origin = repository.remotes[0]
260-
261-
if origin:
262-
unshallow = self.migrate_project or self.clone_depth == PROJECT_CLONE_NO_DEPTH
263-
if unshallow:
264-
try:
265-
# NOTE: It could happen that repository is already un-shallowed,
266-
# in this case we don't want to leak git exception, but still want to fetch.
267-
repository.fetch("origin", repository.active_branch, unshallow=True)
268-
except GitCommandError:
269-
repository.fetch("origin", repository.active_branch)
270-
271-
repository.reset(f"{origin}/{repository.active_branch}", hard=True)
272-
else:
273-
try:
274-
# NOTE: it rarely happens that origin is not reachable. Try again if it fails.
275-
repository.fetch("origin", repository.active_branch)
253+
with Repository(project.abs_path) as repository:
254+
origin = None
255+
tracking_branch = repository.active_branch.remote_branch
256+
if tracking_branch:
257+
origin = tracking_branch.remote
258+
elif len(repository.remotes) == 1:
259+
origin = repository.remotes[0]
260+
261+
if origin:
262+
unshallow = self.migrate_project or self.clone_depth == PROJECT_CLONE_NO_DEPTH
263+
if unshallow:
264+
try:
265+
# NOTE: It could happen that repository is already un-shallowed,
266+
# in this case we don't want to leak git exception, but still want to fetch.
267+
repository.fetch("origin", repository.active_branch, unshallow=True)
268+
except GitCommandError:
269+
repository.fetch("origin", repository.active_branch)
270+
276271
repository.reset(f"{origin}/{repository.active_branch}", hard=True)
277-
except GitCommandError as e:
278-
project.purge()
279-
raise IntermittentCacheError(e)
272+
else:
273+
try:
274+
# NOTE: it rarely happens that origin is not reachable. Try again if it fails.
275+
repository.fetch("origin", repository.active_branch)
276+
repository.reset(f"{origin}/{repository.active_branch}", hard=True)
277+
except GitCommandError as e:
278+
project.purge()
279+
raise IntermittentCacheError(e)
280280
project.last_fetched_at = datetime.utcnow()
281281
project.save()
282282
except (portalocker.LockException, portalocker.AlreadyLocked) as e:
@@ -346,7 +346,8 @@ def sync(self, remote="origin"):
346346
if self.project_path is None:
347347
raise RenkuException("unable to sync with remote since no operation has been executed")
348348

349-
return push_changes(Repository(self.project_path), remote=remote)
349+
with Repository(self.project_path) as repository:
350+
return push_changes(repository, remote=remote)
350351

351352
def execute_and_sync(self, remote="origin"):
352353
"""Execute operation which controller implements and sync with the remote."""

0 commit comments

Comments
 (0)