@@ -111,9 +111,9 @@ def check_if_branch_exists(
111111 cwd = repo_name ,
112112 capture_output = True ,
113113 shell = True ,
114- text = True
114+ text = True ,
115115 )
116-
116+
117117 # If there is output, the branch exists on remote
118118 if result .returncode == 0 and subfolder in result .stdout :
119119 branch_exists = True
@@ -127,8 +127,7 @@ def check_if_branch_exists(
127127 if verbose :
128128 print (f"[{ subfolder } ] Overriding existing branch" )
129129 branch_exists = False
130-
131-
130+
132131 except Exception :
133132 branch_exists = False
134133 pass
@@ -202,7 +201,7 @@ def _main(
202201 # We must clone into a subdirectory which doesn't exist yet.
203202 cache_dir = os .path .join (cache_root , "repo" )
204203 print (f"Pre-cloning repository to cache: { cache_dir } ..." )
205-
204+
206205 rp_cache = None
207206 # Try resolving profile from run_id (directory name) first
208207 try :
@@ -217,7 +216,7 @@ def _main(
217216 rp_cache = registry .get_from_inst ({KEY_INSTANCE_ID : sample_id })
218217 except Exception as e :
219218 print (f"Warning: Could not resolve profile from { sample_id } : { e } " )
220-
219+
221220 path_to_cache = None
222221 if rp_cache :
223222 try :
@@ -228,7 +227,9 @@ def _main(
228227 except Exception as e :
229228 print (f"Pre-clone failed: { e } . Will fall back to per-instance cloning." )
230229 else :
231- print ("Could not resolve profile for pre-cloning. Will iterate per instance." )
230+ print (
231+ "Could not resolve profile for pre-cloning. Will iterate per instance."
232+ )
232233
233234 with concurrent .futures .ProcessPoolExecutor (max_workers = n_workers ) as executor :
234235 # Create a partial function with fixed arguments
@@ -352,19 +353,17 @@ def process_instance(
352353 # We place it in the same temporary directory as the cache to ensure automatic cleanup.
353354 if cache_dir :
354355 # cache_dir is .../temp/repo, so dirname is .../temp
355- repo_path = os .path .join (os .path .dirname (cache_dir ), f"{ rp .repo_name } _worker_{ pid } " )
356+ repo_path = os .path .join (
357+ os .path .dirname (cache_dir ), f"{ rp .repo_name } _worker_{ pid } "
358+ )
356359 else :
357360 # Fallback if no cache used (e.g. debugging), though likely not cleaned up automatically
358361 repo_path = os .path .abspath (f"{ rp .repo_name } _worker_{ pid } " )
359362
360363 # Helper to reset repo state
361364 def reset_repo (path ):
362- subprocess .run (
363- "git reset --hard" , cwd = path , ** subprocess_args
364- )
365- subprocess .run (
366- "git clean -fdx" , cwd = path , ** subprocess_args
367- )
365+ subprocess .run ("git reset --hard" , cwd = path , ** subprocess_args )
366+ subprocess .run ("git clean -fdx" , cwd = path , ** subprocess_args )
368367 # remove potential lock files if previous run crashed hard
369368 lock_file = os .path .join (path , ".git" , "index.lock" )
370369 if os .path .exists (lock_file ):
@@ -380,10 +379,10 @@ def reset_repo(path):
380379 if verbose :
381380 print (f"[{ subfolder } ] Reusing worker repo { repo_path } " )
382381 reset_repo (repo_path )
383-
382+
384383 # We need to know main branch name. We can get it from local repo now.
385384 # Assuming main branch hasn't changed name/ref significantly.
386- # We avoid 'git pull' to save rate limits and time.
385+ # We avoid 'git pull' to save rate limits and time.
387386 main_branch = (
388387 subprocess .run (
389388 "git rev-parse --abbrev-ref HEAD" ,
@@ -396,14 +395,16 @@ def reset_repo(path):
396395 .strip ()
397396 )
398397 # Ensure we are on main branch
399- subprocess .run (f"git checkout { main_branch } " , cwd = repo_path , ** subprocess_args )
398+ subprocess .run (
399+ f"git checkout { main_branch } " , cwd = repo_path , ** subprocess_args
400+ )
400401
401402 else :
402403 # First time setup for this worker
403404 if cache_dir and os .path .exists (cache_dir ):
404405 if verbose :
405406 print (f"[{ subfolder } ] First-time clone from cache { cache_dir } ..." )
406-
407+
407408 subprocess .run (
408409 f"git clone { cache_dir } { repo_path } " ,
409410 check = True ,
@@ -443,7 +444,6 @@ def reset_repo(path):
443444 # Ensure we are clean on main branch before starting
444445 subprocess .run (f"git checkout { main_branch } " , cwd = repo_path , ** subprocess_args )
445446
446-
447447 # Check if branch already created for this problem
448448 branch_exists = check_if_branch_exists (
449449 repo_path , subfolder , main_branch , override_branch , verbose , subprocess_args
@@ -453,7 +453,7 @@ def reset_repo(path):
453453 if verbose :
454454 print (f"[SKIP] { subfolder } : Branch `{ subfolder } ` exists" )
455455 stats ["skipped" ] += 1
456- # Do NOT remove repo, just return.
456+ # Do NOT remove repo, just return.
457457 # We might want to checkout main to be polite to next run but reset_repo handles it.
458458 return task_instances , created_repos , stats
459459
@@ -514,10 +514,14 @@ def reset_repo(path):
514514 if verbose :
515515 print (f"[{ subfolder } ] No changes to commit, skipping" )
516516 stats ["skipped" ] += 1
517- # Reset logic happens at start of next or via finally...
517+ # Reset logic happens at start of next or via finally...
518518 # actually better to cleanup branch now
519- subprocess .run (f"git checkout { main_branch } " , cwd = repo_path , ** subprocess_args )
520- subprocess .run (f"git branch -D { subfolder } " , cwd = repo_path , ** subprocess_args )
519+ subprocess .run (
520+ f"git checkout { main_branch } " , cwd = repo_path , ** subprocess_args
521+ )
522+ subprocess .run (
523+ f"git branch -D { subfolder } " , cwd = repo_path , ** subprocess_args
524+ )
521525 return task_instances , created_repos , stats
522526
523527 cmds = [
@@ -571,7 +575,7 @@ def reset_repo(path):
571575 finally :
572576 # DO NOT remove repo_path. We persist it for this worker logic.
573577 pass
574-
578+
575579 return task_instances , created_repos , stats
576580
577581
0 commit comments