|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import json |
| 4 | +from datetime import datetime |
| 5 | +import sys |
| 6 | + |
| 7 | +print("This tool converts CycloneDX Tool Center v1.0 format (YAML) to v2.0 format (JSON).") |
| 8 | +response = input("Press [Enter] to continue or type 'x' to exit: ").strip().lower() |
| 9 | + |
| 10 | +if response == 'x': |
| 11 | + print("Exiting.") |
| 12 | + sys.exit(0) |
| 13 | + |
| 14 | +# ------------------------------------------------------------------------------ |
| 15 | +# Hardcoded file paths |
| 16 | +# ------------------------------------------------------------------------------ |
| 17 | +YAML_FILE = "tools.yaml" |
| 18 | +JSON_FILE = "tools.json" |
| 19 | + |
| 20 | +# ------------------------------------------------------------------------------ |
| 21 | +# Configuration: if True, always include empty arrays in the JSON output; |
| 22 | +# if False, omit those properties when they would be empty. |
| 23 | +# ------------------------------------------------------------------------------ |
| 24 | +INCLUDE_EMPTY_ARRAYS = True |
| 25 | + |
| 26 | +# ------------------------------------------------------------------------------ |
| 27 | +# Mapping from your recognized category -> (schema property, enum value) |
| 28 | +# ------------------------------------------------------------------------------ |
| 29 | +CATEGORY_MAPPINGS = { |
| 30 | + "opensource": ("availability", "OPEN_SOURCE"), |
| 31 | + "proprietary": ("availability", "SUBSCRIPTION"), |
| 32 | + "analysis": ("functions", "ANALYSIS"), |
| 33 | + "transform": ("functions", "TRANSFORM"), |
| 34 | + "signing-notary": ("functions", "SIGNING/NOTARY"), |
| 35 | + "build-integration": ("functions", "PACKAGE_MANAGER_INTEGRATION"), |
| 36 | + "distribute": ("functions", "DISTRIBUTE"), |
| 37 | + "author": ("functions", "AUTHOR"), |
| 38 | + "library": ("packaging", "LIBRARY"), |
| 39 | + "github-action": ("packaging", "GITHUB_ACTION"), |
| 40 | + "github-app": ("packaging", "GITHUB_APP"), |
| 41 | +} |
| 42 | + |
| 43 | + |
| 44 | +def naive_yaml_parser(lines): |
| 45 | + tools = [] |
| 46 | + current = None |
| 47 | + in_categories = False |
| 48 | + in_multiline_description = False |
| 49 | + multiline_desc_lines = [] |
| 50 | + indent_level = None |
| 51 | + |
| 52 | + def set_value(dct, key, val): |
| 53 | + dct[key] = val |
| 54 | + |
| 55 | + for i, line in enumerate(lines): |
| 56 | + raw_line = line |
| 57 | + line = line.rstrip("\n") |
| 58 | + stripped = line.strip() |
| 59 | + |
| 60 | + # New tool entry |
| 61 | + if stripped.startswith("- name:"): |
| 62 | + if current: |
| 63 | + # flush multiline desc if pending |
| 64 | + if in_multiline_description: |
| 65 | + current["description"] = " ".join(multiline_desc_lines).strip() |
| 66 | + in_multiline_description = False |
| 67 | + tools.append(current) |
| 68 | + current = {"categories": []} |
| 69 | + name_val = line.split("name:", 1)[1].strip() |
| 70 | + set_value(current, "name", name_val) |
| 71 | + in_categories = False |
| 72 | + continue |
| 73 | + |
| 74 | + if not current: |
| 75 | + continue |
| 76 | + |
| 77 | + # Inside multiline description block |
| 78 | + if in_multiline_description: |
| 79 | + if line.strip() == "": |
| 80 | + multiline_desc_lines.append("") # preserve blank lines |
| 81 | + continue |
| 82 | + curr_indent = len(raw_line) - len(raw_line.lstrip()) |
| 83 | + if curr_indent > indent_level: |
| 84 | + multiline_desc_lines.append(stripped) |
| 85 | + continue |
| 86 | + else: |
| 87 | + # End of multiline block |
| 88 | + current["description"] = " ".join(multiline_desc_lines).strip() |
| 89 | + in_multiline_description = False |
| 90 | + multiline_desc_lines = [] |
| 91 | + |
| 92 | + if stripped.startswith("publisher:"): |
| 93 | + set_value(current, "publisher", stripped.split("publisher:", 1)[1].strip()) |
| 94 | + in_categories = False |
| 95 | + elif stripped.startswith("description: >") or stripped.startswith("description: |"): |
| 96 | + in_multiline_description = True |
| 97 | + multiline_desc_lines = [] |
| 98 | + indent_level = len(raw_line) - len(raw_line.lstrip()) |
| 99 | + elif stripped.startswith("description:"): |
| 100 | + set_value(current, "description", stripped.split("description:", 1)[1].strip()) |
| 101 | + in_categories = False |
| 102 | + elif stripped.startswith("repoUrl:"): |
| 103 | + set_value(current, "repoUrl", stripped.split("repoUrl:", 1)[1].strip()) |
| 104 | + in_categories = False |
| 105 | + elif stripped.startswith("websiteUrl:"): |
| 106 | + set_value(current, "websiteUrl", stripped.split("websiteUrl:", 1)[1].strip()) |
| 107 | + in_categories = False |
| 108 | + elif stripped.startswith("categories:"): |
| 109 | + in_categories = True |
| 110 | + elif in_categories and stripped.startswith("-"): |
| 111 | + current["categories"].append(stripped.lstrip("-").strip()) |
| 112 | + |
| 113 | + # Final flush |
| 114 | + if current: |
| 115 | + if in_multiline_description: |
| 116 | + current["description"] = " ".join(multiline_desc_lines).strip() |
| 117 | + tools.append(current) |
| 118 | + |
| 119 | + return tools |
| 120 | + |
| 121 | + |
| 122 | + |
| 123 | +def truncate_description(text, max_len=250): |
| 124 | + """Truncate description to `max_len` characters.""" |
| 125 | + return text[:max_len] if len(text) > max_len else text |
| 126 | + |
| 127 | + |
| 128 | +def map_categories_to_schema(item): |
| 129 | + """Convert 'categories' to availability, function, packaging arrays.""" |
| 130 | + availability = [] |
| 131 | + function = [] |
| 132 | + packaging = [] |
| 133 | + |
| 134 | + for cat in item.get("categories", []): |
| 135 | + cat_lower = cat.lower().strip() |
| 136 | + if cat_lower in CATEGORY_MAPPINGS: |
| 137 | + prop, enum_val = CATEGORY_MAPPINGS[cat_lower] |
| 138 | + if prop == "availability" and enum_val not in availability: |
| 139 | + availability.append(enum_val) |
| 140 | + elif prop == "functions" and enum_val not in function: |
| 141 | + function.append(enum_val) |
| 142 | + elif prop == "packaging" and enum_val not in packaging: |
| 143 | + packaging.append(enum_val) |
| 144 | + return availability, function, packaging |
| 145 | + |
| 146 | + |
| 147 | +def build_tool_schema(item): |
| 148 | + """ |
| 149 | + Build a dict conforming to the tool center v2 schema from a single parsed item. |
| 150 | + """ |
| 151 | + # Extract fields |
| 152 | + name = item.get("name", "").strip() |
| 153 | + publisher = item.get("publisher", "").strip() |
| 154 | + description = item.get("description", "").strip() |
| 155 | + repo_url = item.get("repoUrl", "").strip() |
| 156 | + website_url = item.get("websiteUrl", "").strip() |
| 157 | + |
| 158 | + # Truncate description |
| 159 | + description = truncate_description(description) |
| 160 | + |
| 161 | + # Convert categories |
| 162 | + availability, function, packaging = map_categories_to_schema(item) |
| 163 | + |
| 164 | + # We'll store the arrays in a dictionary; then optionally remove them if empty |
| 165 | + arrays = { |
| 166 | + "capabilities": [], |
| 167 | + "analysis": [], |
| 168 | + "transform": [], |
| 169 | + "library": [], |
| 170 | + "platform": [], |
| 171 | + "lifecycle": [], |
| 172 | + "supportedStandards": [], |
| 173 | + "cycloneDxVersion": [], |
| 174 | + "supportedLanguages": [] |
| 175 | + } |
| 176 | + |
| 177 | + # Build final object |
| 178 | + tool_obj = { |
| 179 | + "name": name, |
| 180 | + "publisher": publisher, |
| 181 | + "description": description, |
| 182 | + "availability": availability, |
| 183 | + "functions": function, |
| 184 | + "packaging": packaging, |
| 185 | + } |
| 186 | + |
| 187 | + if repo_url: |
| 188 | + tool_obj["repository_url"] = repo_url |
| 189 | + if website_url: |
| 190 | + tool_obj["website_url"] = website_url |
| 191 | + |
| 192 | + # Decide whether to keep empty arrays (INCLUDE_EMPTY_ARRAYS) |
| 193 | + if INCLUDE_EMPTY_ARRAYS: |
| 194 | + for key, val in arrays.items(): |
| 195 | + tool_obj[key] = val |
| 196 | + else: |
| 197 | + for key, val in arrays.items(): |
| 198 | + if val: # only add if non-empty |
| 199 | + tool_obj[key] = val |
| 200 | + |
| 201 | + return tool_obj |
| 202 | + |
| 203 | + |
| 204 | +def main(): |
| 205 | + # 1) Read from tools.yaml |
| 206 | + with open(YAML_FILE, "r", encoding="utf-8") as f: |
| 207 | + lines = f.readlines() |
| 208 | + |
| 209 | + # 2) Parse the naive YAML |
| 210 | + items = naive_yaml_parser(lines) |
| 211 | + |
| 212 | + # 3) Build schema objects |
| 213 | + tool_objs = [] |
| 214 | + for it in items: |
| 215 | + tool_objs.append(build_tool_schema(it)) |
| 216 | + |
| 217 | + # 4) Current UTC time with HH:MM:SS (and date) |
| 218 | + # Example: "2023-05-07T13:45:30Z" |
| 219 | + now_utc = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") |
| 220 | + |
| 221 | + # 5) Final JSON structure |
| 222 | + result = { |
| 223 | + "$schema": "https://cyclonedx.org/schema/tool-center-v2.schema.json", |
| 224 | + "specVersion": "2.0", |
| 225 | + "last_updated": now_utc, |
| 226 | + "tools": tool_objs |
| 227 | + } |
| 228 | + |
| 229 | + # 6) Write to tools.json (overwrite if exists) |
| 230 | + with open(JSON_FILE, "w", encoding="utf-8") as out_f: |
| 231 | + json.dump(result, out_f, indent=2, ensure_ascii=False) |
| 232 | + |
| 233 | + print(f"Converted {len(tool_objs)} tools from {YAML_FILE} → {JSON_FILE}") |
| 234 | + print(f"Set last_updated to: {now_utc}") |
| 235 | + |
| 236 | + |
| 237 | +if __name__ == "__main__": |
| 238 | + main() |
0 commit comments