|
| 1 | +import argparse |
| 2 | +import glob |
| 3 | +import json |
| 4 | +import os |
| 5 | +from collections import defaultdict |
| 6 | + |
| 7 | +import yaml |
| 8 | + |
| 9 | +from ephemeris.generate_tool_list_from_ga_workflow_files import ( |
| 10 | + generate_repo_list_from_workflow, |
| 11 | +) |
| 12 | +from steal_sections import steal_section |
| 13 | +from fix_lockfile import update_file as fix_lockfile |
| 14 | +from update_tool import update_file |
| 15 | + |
| 16 | +GALAXY_URL = "https://usegalaxy.eu" |
| 17 | + |
| 18 | + |
| 19 | +def find_workflows(workflow_path): |
| 20 | + workflow_files = [] |
| 21 | + for dirpath, _, filenames in os.walk(workflow_path): |
| 22 | + workflow_files.extend( |
| 23 | + ( |
| 24 | + os.path.join(dirpath, filename) |
| 25 | + for filename in filenames |
| 26 | + if filename.endswith(".ga") |
| 27 | + ) |
| 28 | + ) |
| 29 | + return workflow_files |
| 30 | + |
| 31 | + |
| 32 | +def add_repos(workflow_path, toolset, uncategorized_file): |
| 33 | + workflow_paths = find_workflows(workflow_path) |
| 34 | + repo_list = generate_repo_list_from_workflow(workflow_paths, "Uncategorized") |
| 35 | + steal_section( |
| 36 | + {"tools": repo_list}, |
| 37 | + toolset, |
| 38 | + leftovers_file=os.path.join(toolset, uncategorized_file), |
| 39 | + galaxy_url=GALAXY_URL, |
| 40 | + verbose=True, |
| 41 | + ) |
| 42 | + section_files = glob.glob(f"{toolset}/*.yml") |
| 43 | + for section_file in section_files: |
| 44 | + fix_lockfile( |
| 45 | + section_file, |
| 46 | + install_repository_dependencies=False, |
| 47 | + install_resolver_dependencies=False, |
| 48 | + ) |
| 49 | + update_file(section_file, without=True) |
| 50 | + lock_files = glob.glob(f"{toolset}/*.yml.lock") |
| 51 | + lock_file_contents = {} |
| 52 | + # Keep a global lookup to find which lock file contains each tool |
| 53 | + global_tool_lookup = {} # (owner, name) -> lock_file |
| 54 | + |
| 55 | + # Load all lock files |
| 56 | + for lock_file in lock_files: |
| 57 | + with open(lock_file) as lock_file_fh: |
| 58 | + lock_contents = yaml.safe_load(lock_file_fh) |
| 59 | + lock_file_contents[lock_file] = lock_contents |
| 60 | + |
| 61 | + # Build global lookup for finding tools |
| 62 | + for repo in lock_contents["tools"]: |
| 63 | + key = (repo["owner"], repo["name"]) |
| 64 | + if key not in global_tool_lookup: |
| 65 | + global_tool_lookup[key] = lock_file |
| 66 | + |
| 67 | + # Add revisions from workflow repos to the appropriate lock files |
| 68 | + for workflow_repo in repo_list: |
| 69 | + key = (workflow_repo["owner"], workflow_repo["name"]) |
| 70 | + if key in global_tool_lookup: |
| 71 | + lock_file = global_tool_lookup[key] |
| 72 | + lock_contents = lock_file_contents[lock_file] |
| 73 | + # Find the tool in this specific lock file and add revisions |
| 74 | + for repo in lock_contents["tools"]: |
| 75 | + if repo["owner"] == workflow_repo["owner"] and repo["name"] == workflow_repo["name"]: |
| 76 | + repo["revisions"] = sorted( |
| 77 | + list(set(repo.get("revisions", []) + workflow_repo["revisions"])) |
| 78 | + ) |
| 79 | + break |
| 80 | + |
| 81 | + # Deduplicate tools within each lock file separately |
| 82 | + for lock_file, entries in lock_file_contents.items(): |
| 83 | + # Create deduplicated tools list for this specific file |
| 84 | + tool_map = {} # key: (owner, name) -> value: merged tool dict |
| 85 | + |
| 86 | + for tool in entries["tools"]: |
| 87 | + key = (tool["owner"], tool["name"]) |
| 88 | + if key not in tool_map: |
| 89 | + # First occurrence in this file - store it |
| 90 | + tool_map[key] = tool |
| 91 | + else: |
| 92 | + # Duplicate in this file - merge revisions into first occurrence |
| 93 | + existing_tool = tool_map[key] |
| 94 | + existing_tool["revisions"] = sorted( |
| 95 | + list(set(existing_tool.get("revisions", []) + tool.get("revisions", []))) |
| 96 | + ) |
| 97 | + |
| 98 | + # Rebuild the tools list from the deduplicated map, preserving original order |
| 99 | + deduplicated_tools = [] |
| 100 | + seen = set() |
| 101 | + for tool in entries["tools"]: |
| 102 | + key = (tool["owner"], tool["name"]) |
| 103 | + if key not in seen: |
| 104 | + seen.add(key) |
| 105 | + deduplicated_tools.append(tool_map[key]) |
| 106 | + |
| 107 | + entries["tools"] = deduplicated_tools |
| 108 | + |
| 109 | + with open(lock_file, "w") as lock_file_fh: |
| 110 | + yaml.safe_dump(json.loads(json.dumps(entries)), stream=lock_file_fh) |
| 111 | + |
| 112 | + |
| 113 | +if __name__ == "__main__": |
| 114 | + |
| 115 | + parser = argparse.ArgumentParser(description="") |
| 116 | + parser.add_argument("-w", "--workflow-path", help="Path to directory with workflows") |
| 117 | + parser.add_argument("-s", "--toolset", default="usegalaxy.org", help="The toolset dir to add versions to") |
| 118 | + parser.add_argument("-u", "--uncategorized-file", default="leftovers.yaml", help="The file to store leftover (uninstalled) repos in.") |
| 119 | + |
| 120 | + args = parser.parse_args() |
| 121 | + |
| 122 | + add_repos(workflow_path=args.workflow_path, toolset=args.toolset, uncategorized_file=args.uncategorized_file) |
0 commit comments