|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import argparse |
| 6 | +import json |
| 7 | +import shutil |
| 8 | +import subprocess |
| 9 | +import tempfile |
| 10 | +import urllib.parse |
| 11 | +from pathlib import Path |
| 12 | +from typing import Dict, List |
| 13 | + |
| 14 | +_HERE = Path(__file__).parent |
| 15 | + |
| 16 | +_LANGUAGES = ["swift", "kotlin", "java", "go", "python"] |
| 17 | + |
| 18 | + |
| 19 | +def run_command(cmd: List[str], cwd: Path = None) -> subprocess.CompletedProcess[str]: |
| 20 | + """Run a shell command and return the result.""" |
| 21 | + try: |
| 22 | + result = subprocess.run( |
| 23 | + cmd, cwd=cwd, capture_output=True, text=True, check=True |
| 24 | + ) |
| 25 | + return result |
| 26 | + except subprocess.CalledProcessError as e: |
| 27 | + print(f"Command failed: {' '.join(cmd)}") |
| 28 | + print(f"Error: {e.stderr}") |
| 29 | + raise |
| 30 | + |
| 31 | + |
| 32 | +def extract_tree_sitter_deps(repo_root: Path) -> list[tuple[str, str, dict[str, str]]]: |
| 33 | + """Extract tree-sitter dependencies from cargo metadata.""" |
| 34 | + cmd = ["cargo", "metadata", "--format-version", "1"] |
| 35 | + result = run_command(cmd, cwd=repo_root) |
| 36 | + metadata = json.loads(result.stdout) |
| 37 | + |
| 38 | + deps = [] |
| 39 | + |
| 40 | + for package in metadata["packages"]: |
| 41 | + if not package["name"].startswith("tree-sitter-"): |
| 42 | + continue |
| 43 | + |
| 44 | + lang_name = package["name"].replace("tree-sitter-", "") |
| 45 | + if lang_name in _LANGUAGES: |
| 46 | + deps.append((lang_name, package["name"], package)) |
| 47 | + |
| 48 | + return deps |
| 49 | + |
| 50 | + |
| 51 | +def parse_git_source(dep_info: Dict) -> tuple[str, str]: |
| 52 | + """Parse git source information from dependency.""" |
| 53 | + source: str = dep_info["source"] |
| 54 | + # An example git string: "git+https://github.com/danieltrt/tree-sitter-go.git?rev=ea5ceb716012db8813a2c05fab23c3a020988724#ea5ceb716012db8813a2c05fab23c3a020988724" |
| 55 | + # So we first remove the "git+" prefix and remove the "#" part if it exists. |
| 56 | + source = source.removeprefix("git+").split("#")[0].strip() |
| 57 | + |
| 58 | + if "?" not in source: |
| 59 | + raise ValueError(f"Expecting ? in git source string: {source}") |
| 60 | + |
| 61 | + git_url, query_string = source.split("?", 1) |
| 62 | + params = urllib.parse.parse_qs(query_string) |
| 63 | + rev = ( |
| 64 | + params.get("rev", [None])[0] |
| 65 | + or params.get("branch", [None])[0] |
| 66 | + or params.get("tag", [None])[0] |
| 67 | + ) |
| 68 | + if not rev: |
| 69 | + raise ValueError(f"Missing rev/branch/tag information in git source: {source}") |
| 70 | + |
| 71 | + return git_url, rev |
| 72 | + |
| 73 | + |
| 74 | +def clone_grammar(name: str, dep_info: Dict, temp_dir: Path) -> Path: |
| 75 | + """Clone a grammar repository to temporary directory.""" |
| 76 | + source: str = dep_info["source"] |
| 77 | + # If it is a git source, parse it. Otherwise, it is a registry source, and we can assume it is |
| 78 | + # from tree-sitter official repo. |
| 79 | + if source.startswith("git+"): |
| 80 | + git_url, version = parse_git_source(dep_info) |
| 81 | + elif source.startswith("registry+"): |
| 82 | + repo_name = name.replace("tree-sitter-", "") |
| 83 | + git_url = f"https://github.com/tree-sitter/tree-sitter-{repo_name}" |
| 84 | + version = "v" + dep_info["version"] |
| 85 | + else: |
| 86 | + raise ValueError(f"Unsupported source type for {name}: {source}") |
| 87 | + |
| 88 | + clone_dir = temp_dir / name |
| 89 | + |
| 90 | + print(f"Cloning {name} from {git_url} and checking out {version}") |
| 91 | + run_command(["git", "clone", git_url, str(clone_dir)]) |
| 92 | + run_command(["git", "checkout", version], cwd=clone_dir) |
| 93 | + |
| 94 | + return clone_dir |
| 95 | + |
| 96 | + |
| 97 | +def build_wasm(grammar_dir: Path, name: str) -> Path: |
| 98 | + """Build WASM file for a grammar.""" |
| 99 | + print(f"Building WASM for {name}") |
| 100 | + |
| 101 | + # Note that we have to use tree-sitter CLI 0.24 since the main tree-sitter and grammars |
| 102 | + # we use in Piranha are old and not compatible with the latest tree-sitter CLI. |
| 103 | + # TODO: remove this restriction once we upstream all our changes to tree-sitter grammars |
| 104 | + # and upgrade to latest tree-sitter in Piranha. |
| 105 | + try: |
| 106 | + proc = run_command(["tree-sitter", "--version"]) |
| 107 | + version = proc.stdout.strip().split()[1] |
| 108 | + if not version.startswith("0.24"): |
| 109 | + raise RuntimeError(f"tree-sitter CLI version {version} not supported") |
| 110 | + except (subprocess.CalledProcessError, FileNotFoundError, RuntimeError): |
| 111 | + raise RuntimeError( |
| 112 | + "tree-sitter CLI version 0.24.x is required. Install with: cargo install tree-sitter-cli --version 0.24.4" |
| 113 | + ) |
| 114 | + |
| 115 | + print(f"Using tree-sitter CLI version: {proc.stdout.strip()}") |
| 116 | + |
| 117 | + run_command(["tree-sitter", "build", "--wasm"], cwd=grammar_dir) |
| 118 | + |
| 119 | + wasm_file = grammar_dir / f"{name}.wasm" |
| 120 | + if not wasm_file.exists(): |
| 121 | + raise FileNotFoundError(f"WASM file not found for {name}") |
| 122 | + |
| 123 | + return wasm_file |
| 124 | + |
| 125 | + |
| 126 | +def copy_wasm_to_assets(wasm_file: Path, lang_name: str, assets_dir: Path) -> Path: |
| 127 | + """Copy WASM file to assets directory.""" |
| 128 | + assets_dir.mkdir(exist_ok=True) |
| 129 | + |
| 130 | + dest_file = assets_dir / f"tree-sitter-{lang_name}.wasm" |
| 131 | + |
| 132 | + print(f"Copying {wasm_file} to {dest_file}") |
| 133 | + shutil.copy2(wasm_file, dest_file) |
| 134 | + |
| 135 | + return dest_file |
| 136 | + |
| 137 | + |
| 138 | +def instantiate_index_html(template_path: Path, output_path: Path): |
| 139 | + with template_path.open("r") as inp, output_path.open("w") as out: |
| 140 | + content = inp.read() |
| 141 | + languages = [ |
| 142 | + f'<option value="{lang}">{lang.title()}</option>' for lang in _LANGUAGES |
| 143 | + ] |
| 144 | + content = content.replace("{{ LANGUAGE_OPTIONS }}", "\n".join(languages)) |
| 145 | + out.write(content) |
| 146 | + |
| 147 | + |
| 148 | +def main(): |
| 149 | + """Build WASM files for all supported tree-sitter dependencies.""" |
| 150 | + |
| 151 | + """Main entry point with argument parsing.""" |
| 152 | + parser = argparse.ArgumentParser( |
| 153 | + description="Build tree-sitter playground with WASM files" |
| 154 | + ) |
| 155 | + parser.add_argument( |
| 156 | + "--dist-dir", |
| 157 | + "-d", |
| 158 | + type=Path, |
| 159 | + help="Directory to copy playground files and build WASM files to", |
| 160 | + default=Path().cwd() / "dist", |
| 161 | + ) |
| 162 | + |
| 163 | + args = parser.parse_args() |
| 164 | + dist_dir = Path(args.dist_dir) |
| 165 | + |
| 166 | + if dist_dir.exists(): |
| 167 | + print(f"Dist directory {dist_dir} already exists, clearing it...") |
| 168 | + shutil.rmtree(dist_dir) |
| 169 | + |
| 170 | + proc = run_command(["git", "rev-parse", "--show-toplevel"]) |
| 171 | + repo_root = Path(proc.stdout.strip()) |
| 172 | + print(f"Using repo root: {repo_root}") |
| 173 | + print() |
| 174 | + |
| 175 | + print("Instantiating index.html.template to dist directory...") |
| 176 | + dist_dir.mkdir(parents=True, exist_ok=True) |
| 177 | + instantiate_index_html(_HERE / "index.html.template", dist_dir / "index.html") |
| 178 | + |
| 179 | + print("Building WASM files for all supported tree-sitter dependencies...") |
| 180 | + |
| 181 | + print("Extracting tree-sitter dependencies to build WASM grammars...") |
| 182 | + deps = extract_tree_sitter_deps(repo_root) |
| 183 | + |
| 184 | + if not deps: |
| 185 | + raise RuntimeError("No supported tree-sitter dependencies found") |
| 186 | + |
| 187 | + print(f"Found {len(deps)} supported tree-sitter dependencies:") |
| 188 | + for lang_name, pkg_name, _ in deps: |
| 189 | + print(f" - {pkg_name} ({lang_name})") |
| 190 | + |
| 191 | + with tempfile.TemporaryDirectory() as temp_dir_str: |
| 192 | + temp_dir = Path(temp_dir_str) |
| 193 | + |
| 194 | + for lang_name, pkg_name, dep_info in deps: |
| 195 | + print(f"\n--- Processing {pkg_name} ---") |
| 196 | + |
| 197 | + grammar_dir = clone_grammar(pkg_name, dep_info, temp_dir) |
| 198 | + wasm_file = build_wasm(grammar_dir, pkg_name) |
| 199 | + copy_wasm_to_assets(wasm_file, lang_name, dist_dir / "assets") |
| 200 | + |
| 201 | + print(f"✓ Successfully built {pkg_name}") |
| 202 | + |
| 203 | + print("\n=== Build Complete ===") |
| 204 | + print(f"Successfully built {len(deps)} grammars: {_LANGUAGES}") |
| 205 | + print(f"Output directory: {dist_dir}") |
| 206 | + |
| 207 | + |
| 208 | +if __name__ == "__main__": |
| 209 | + main() |
0 commit comments