22import os
33import shutil
44import subprocess
5+ import time
56from pathlib import Path
67
78logger = logging .getLogger ()
89
910
11+ def cleanup_old_sessions (base_path : Path , max_age_hours : int = 24 ) -> None :
12+ """Remove session directories older than max_age_hours."""
13+ try :
14+ if not base_path .exists ():
15+ return
16+
17+ cutoff_time = time .time () - (max_age_hours * 3600 )
18+
19+ for item in base_path .iterdir ():
20+ if item .is_dir () and (item .name .startswith ("editing-" ) or item .name .startswith ("session-" )):
21+ try :
22+ if item .stat ().st_mtime < cutoff_time :
23+ logger .info (f"Removing old session directory: { item .name } " )
24+ shutil .rmtree (item )
25+ except Exception as e :
26+ logger .warning (f"Failed to remove old session { item .name } : { e } " )
27+ except Exception as e :
28+ logger .warning (f"Failed to cleanup old sessions: { e } " )
29+
30+
1031def clone_repo (repository : str , session_id : str , session_type : str = "session" ) -> str :
11- """Clone a GitHub repository into /tmp .
32+ """Clone a GitHub repository into EFS storage .
1233
1334 Args:
1435 repository: GitHub repository in format 'owner/repo'
@@ -22,36 +43,75 @@ def clone_repo(repository: str, session_id: str, session_type: str = "session")
2243 if not github_token :
2344 raise RuntimeError ("GITHUB_TOKEN environment variable not set" )
2445
25- repo_path = Path ("/tmp" ) / f"{ session_type } -{ session_id } " / repository
26- repo_path .parent .mkdir (parents = True , exist_ok = True )
46+ base_path = Path (os .environ .get ("HOME" , "/mnt/efs" )) / "repos"
47+ cache_path = base_path / repository .replace ("/" , "_" )
48+ session_path = base_path / f"{ session_type } -{ session_id } " / repository
49+
50+ base_path .mkdir (parents = True , exist_ok = True )
51+ session_path .parent .mkdir (parents = True , exist_ok = True )
52+
53+ cleanup_old_sessions (base_path )
54+
55+ clone_url = f"https://x-access-token:{ github_token } @github.com/{ repository } .git"
56+
57+ if cache_path .exists () and (cache_path / ".git" ).exists ():
58+ logger .info (f"Found cached repo at { cache_path } , copying to { session_path } " )
59+ try :
60+ if session_path .exists ():
61+ shutil .rmtree (session_path )
62+ shutil .copytree (cache_path , session_path )
63+
64+ subprocess .run (
65+ ["git" , "fetch" , "origin" ],
66+ cwd = str (session_path ),
67+ check = True ,
68+ capture_output = True ,
69+ text = True ,
70+ )
71+ subprocess .run (
72+ ["git" , "reset" , "--hard" , "origin/HEAD" ],
73+ cwd = str (session_path ),
74+ check = True ,
75+ capture_output = True ,
76+ text = True ,
77+ )
78+ logger .info (f"Successfully reused cached repo and updated to latest" )
79+ except Exception as e :
80+ logger .warning (f"Failed to reuse cache: { e } , will clone fresh" )
81+ if session_path .exists ():
82+ shutil .rmtree (session_path )
83+ _clone_fresh (clone_url , session_path , repository )
84+ else :
85+ logger .info (f"No cache found, cloning fresh into { session_path } " )
86+ _clone_fresh (clone_url , session_path , repository )
2787
28- if repo_path .exists ():
29- logger .info (f"Directory { repo_path } already exists, removing it (Lambda container reuse)" )
3088 try :
31- shutil .rmtree (repo_path )
32- logger .info (f"Successfully removed existing directory at { repo_path } " )
89+ if cache_path .exists ():
90+ shutil .rmtree (cache_path )
91+ shutil .copytree (session_path , cache_path )
92+ logger .info (f"Cached repo for future use at { cache_path } " )
3393 except Exception as e :
34- logger .error (f"Failed to remove existing directory at { repo_path } : { e } " )
35- raise RuntimeError (f"Failed to clean up existing directory: { e } " )
94+ logger .warning (f"Failed to create cache: { e } , continuing without cache" )
3695
37- clone_url = f"https://x-access-token:{ github_token } @github.com/{ repository } .git"
96+ configure_git_auth (str (session_path ))
97+
98+ return str (session_path )
3899
39- logger .info (f"Cloning { repository } into { repo_path } " )
100+
101+ def _clone_fresh (clone_url : str , repo_path : Path , repository : str ) -> None :
102+ """Helper to clone a repository from scratch."""
40103 try :
41104 subprocess .run (
42105 ["git" , "clone" , clone_url , str (repo_path )],
43106 check = True ,
44107 capture_output = True ,
45108 text = True ,
46109 )
110+ logger .info (f"Successfully cloned { repository } " )
47111 except subprocess .CalledProcessError as e :
48112 logger .error (f"Failed to clone repository: { e .stderr } " )
49113 raise RuntimeError (f"Failed to clone { repository } : { e .stderr } " )
50114
51- configure_git_auth (str (repo_path ))
52-
53- return str (repo_path )
54-
55115
56116def configure_git_auth (repo_path : str ) -> None :
57117 """Configure git user for the repository."""
0 commit comments