Skip to content

Commit 615dafa

Browse files
authored
Clone solacc repos 1 by 1 and delete them once linting is done (#2753)
## Changes Current implementation clones all `solacc` repos at once, occupying 1.5g which brings us close to limits in CI This PR fixes that by cloning repos 1 by 1 and deleting them once linting is done ### Linked issues None ### Functionality None ### Tests - [x] manually tested --------- Co-authored-by: Eric Vergnaud <[email protected]>
1 parent 143c637 commit 615dafa

File tree

1 file changed

+35
-23
lines changed

1 file changed

+35
-23
lines changed

tests/integration/source_code/solacc.py

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
dist = (this_file / '../../../../dist').resolve().absolute()
2323

2424

25-
def _clone_all():
25+
def _get_repos_to_clone() -> dict[str, str]:
2626
params = {'per_page': 100, 'page': 1}
27-
to_clone = []
27+
to_clone: dict[str, str] = {}
2828
while True:
2929
result = requests.get(
3030
'https://api.github.com/orgs/databricks-industry-solutions/repos',
@@ -35,18 +35,24 @@ def _clone_all():
3535
break
3636
if 'message' in result:
3737
logger.error(result['message'])
38-
return
38+
return to_clone
3939
params['page'] += 1
4040
for repo in result:
41-
to_clone.append(repo['clone_url'])
41+
name = repo['name']
42+
if name == '.github':
43+
continue
44+
to_clone[name] = repo['clone_url']
45+
return to_clone
46+
47+
48+
def _clone_repo(repo_url, repo_name):
4249
dist.mkdir(exist_ok=True)
43-
to_clone = sorted(to_clone) # [:10]
44-
for url in to_clone:
45-
dst = dist / url.split("/")[-1].split(".")[0]
46-
if dst.exists():
47-
continue
48-
logger.info(f'Cloning {url} into {dst}')
49-
run_command(f'git clone {url} {dst}')
50+
dst = dist / repo_name
51+
if dst.exists():
52+
return dst
53+
logger.info(f'Cloning {repo_url} into {dst}')
54+
run_command(f'git clone {repo_url} {dst}')
55+
return dst
5056

5157

5258
def _collect_missing_imports(advices: list[LocatedAdvice]):
@@ -169,11 +175,20 @@ def _lint_dir(solacc: _SolaccContext, soldir: Path):
169175
path_lookup.clean_tmp_sys_paths()
170176

171177

172-
def _lint_dirs(dir_to_lint: str | None):
173-
solacc = _SolaccContext.create(dir_to_lint is not None)
174-
all_dirs = os.listdir(dist) if dir_to_lint is None else [dir_to_lint]
175-
for soldir in all_dirs:
176-
_lint_dir(solacc, dist / soldir)
178+
def _lint_repos(clone_urls, sol_to_lint: str | None):
179+
solacc = _SolaccContext.create(sol_to_lint is not None)
180+
if sol_to_lint:
181+
# don't clone if linting just one file, assumption is we're troubleshooting
182+
_lint_dir(solacc, dist / sol_to_lint)
183+
else:
184+
names: list[str] = list(clone_urls.keys())
185+
for name in sorted(names, key=str.casefold):
186+
logger.info(f"Cloning {name}...")
187+
sol_dir = _clone_repo(clone_urls[name], name)
188+
logger.info(f"Linting {name}...")
189+
_lint_dir(solacc, sol_dir)
190+
if os.getenv("CI"):
191+
shutil.rmtree(sol_dir)
177192
all_files_len = solacc.total_count - (len(solacc.files_to_skip) if solacc.files_to_skip else 0)
178193
parseable_pct = int(solacc.parseable_count / all_files_len * 100)
179194
missing_imports_count = sum(sum(details.values()) for details in solacc.missing_imports.values())
@@ -192,13 +207,10 @@ def _lint_dirs(dir_to_lint: str | None):
192207
def main(args: list[str]):
193208
install_logger()
194209
logging.root.setLevel(logging.INFO)
195-
dir_to_lint = args[1] if len(args) > 1 else None
196-
if not dir_to_lint:
197-
# don't clone if linting just one file, assumption is we're troubleshooting
198-
logger.info("Cloning...")
199-
_clone_all()
200-
logger.info("Linting...")
201-
_lint_dirs(dir_to_lint)
210+
sol_to_lint = args[1] if len(args) > 1 else None
211+
logger.info("Fetching repos to clone...")
212+
repo_urls = _get_repos_to_clone()
213+
_lint_repos(repo_urls, sol_to_lint)
202214

203215

204216
if __name__ == "__main__":

0 commit comments

Comments
 (0)