diff --git a/pyproject.toml b/pyproject.toml index cdac5db058a..377c215c80f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,4 +22,9 @@ dev = [ "pre-commit==4.4.0", ] +[tool.uv.workspace] +members = [ + "tools/opm", +] + # This file does not describe packaging details for compiler and runtime diff --git a/tools/opm/README.md b/tools/opm/README.md new file mode 100644 index 00000000000..bd22e18304a --- /dev/null +++ b/tools/opm/README.md @@ -0,0 +1,58 @@ +# OPM (ONE or circle Package Manager) + +`opm` is a set of utility scripts to simplify the process of creating GGMA packages for ONE runtime. It handles environment setup, model downloading, and the export pipeline. + +## Usage + +The tools are designed to be run via the `opm` wrapper script (or directly via python if preferred). + +### 1. Init + +Initialize the environment. This creates a virtual environment, installs dependencies (including a CPU-only version of torch to avoid large downloads), and fetches the necessary `o2o` tools from the ONE repository. + +```bash +$ opm init +``` + +### 2. Import + +Download a model. + +```bash +$ opm import [-r ] +``` + +- ``: HuggingFace model ID (e.g., `Maykeye/TinyLLama-v0`) or direct URL. +- `-r, --requirements`: (Optional) Path to a requirements file to install specific dependencies for the model. + +Example: +```bash +$ opm import Maykeye/TinyLLama-v0 -r tinyllama/tinyllama.requirements +``` + +### 3. Export + +Export the downloaded model to a GGMA package (`.circle` file + tokenizer). This runs the specified export script and pipeline configuration. + +```bash +$ opm export -s -p +``` + +- `-s, --script`: Path to the python export script (e.g., `tinyllama/tinyllama.py`). +- `-p, --pipeline`: Path to the pipeline configuration YAML file (e.g., `tinyllama/tinyllama.pipeline`). + +Example: +```bash +$ opm export -s tinyllama/tinyllama.py -p tinyllama/tinyllama.pipeline +``` + +### 4. Clean + +Clean up build artifacts. + +```bash +$ opm clean [--all] +``` + +- Default: Removes the `build/` directory. +- `--all`: Removes `build/`, `venv/`, `o2o/`, and `TICO/` (full reset). diff --git a/tools/opm/clean.py b/tools/opm/clean.py new file mode 100755 index 00000000000..3da3bc56ed3 --- /dev/null +++ b/tools/opm/clean.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +import shutil +import os + +import argparse +from config import VENV_DIR, O2O_DIR, BUILD_DIR + + +def remove_dir(path): + """Remove directory if it exists, print message only if removed.""" + try: + shutil.rmtree(path) + print(f"Removing {path} directory...") + except FileNotFoundError: + pass + + +def main(): + parser = argparse.ArgumentParser(description="Clean build artifacts") + parser.add_argument("--all", + action="store_true", + help="Remove all generated files including venv and o2o") + args = parser.parse_args() + + # Always remove build directory + remove_dir(BUILD_DIR) + + if args.all: + remove_dir(O2O_DIR) + remove_dir(VENV_DIR) + print("Full clean complete.") + else: + print("Clean complete.") + + +if __name__ == "__main__": + main() diff --git a/tools/opm/config.py b/tools/opm/config.py new file mode 100644 index 00000000000..0985cc08cdd --- /dev/null +++ b/tools/opm/config.py @@ -0,0 +1,4 @@ +# OPM Constants +VENV_DIR = "venv" +O2O_DIR = "o2o" +BUILD_DIR = "build" diff --git a/tools/opm/export.py b/tools/opm/export.py new file mode 100755 index 00000000000..f87eba5c03b --- /dev/null +++ b/tools/opm/export.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +import argparse +import shutil +import json +import yaml +import os +import sys +from config import O2O_DIR, BUILD_DIR +from util import run_cmd + + +def main(): + parser = argparse.ArgumentParser(description="Export model to GGMA package") + parser.add_argument("-s", + "--script", + required=True, + help="Export script to use (e.g., tinyllama.py)") + parser.add_argument("-p", + "--pipeline", + default="pipeline.yaml", + help="Pipeline configuration file (default: pipeline.yaml)") + args = parser.parse_args() + + export_script_name = args.script + pipeline_file = args.pipeline + + # Change to build directory + if not os.path.exists(BUILD_DIR): + print(f"Error: {BUILD_DIR} directory does not exist. Run 'opm import' first.") + return + + os.chdir(BUILD_DIR) + + # Load pipeline configuration + pipeline_path = os.path.abspath(pipeline_file) + if not os.path.exists(pipeline_path): + print(f"Error: Pipeline file {pipeline_path} not found.") + return + + with open(pipeline_path, "r") as f: + pipeline_config = yaml.safe_load(f) + + # Find model directory by config.json + model_dir = None + for d in os.listdir("."): + config_path = os.path.join(d, "config.json") + if os.path.isdir(d) and os.path.exists(config_path): + model_dir = d + print(f"Using local model directory: {model_dir}") + break + + if not model_dir: + raise ValueError("No local model directory found (directory with config.json)") + + # Add o2o tools and the current directory (where pipeline scripts reside) to PATH + env = os.environ.copy() + cwd_path = os.path.abspath(os.getcwd()) + o2o_path = os.path.abspath(os.path.join(cwd_path, "..", O2O_DIR)) + env["PATH"] = f"{o2o_path}:{cwd_path}:{env['PATH']}" + os.environ.update(env) + + # Use current python executable instead of finding venv python + python_bin = sys.executable + export_script = os.path.join("..", export_script_name) + + # 1. Generate prefill and decode circles + print(f"Running {export_script_name} (prefill)...") + run_cmd(f"{python_bin} {export_script} --mode prefill", env=env) + + print(f"Running {export_script_name} (decode)...") + run_cmd(f"{python_bin} {export_script} --mode decode", env=env) + + # Helper to run pipeline command + def run_pipeline_step(step_name): + if step_name in pipeline_config: + print(f"Running {step_name} pipeline...") + cmd = pipeline_config[step_name] + # If cmd is a multiline string (from YAML |), it might contain newlines. + # We can replace newlines with spaces or let the shell handle it if it's a single command string. + # For safety with pipes, we replace newlines with spaces if they are meant to be a single line command. + # But YAML block scalar preserves newlines. + # If the user wrote it with pipes at the start of lines, we should join them. + cmd = cmd.replace("\n", " ") + run_cmd(cmd, env=env) + + # 2. Pipeline (decode) + run_pipeline_step("decode") + + # 3. Merge + run_pipeline_step("merge") + + # 4. Create package directory and copy files + # Find source directory with tokenizer.json + source_dir = None + for d in os.listdir("."): + if os.path.isdir(d) and os.path.exists(os.path.join(d, "tokenizer.json")): + source_dir = d + break + + if source_dir: + package_dir = "out" + print(f"Creating package directory {package_dir}...") + os.makedirs(package_dir, exist_ok=True) + + # Copy tokenizer and config files + for filename in ["tokenizer.json", "tokenizer.model", "config.json"]: + src = os.path.join(source_dir, filename) + if os.path.exists(src): + shutil.copy2(src, package_dir) + + # Move model.circle + print(f"Moving model.circle to {package_dir}...") + shutil.move("model.circle", os.path.join(package_dir, "model.circle")) + else: + print( + "Warning: Could not find source directory (directory with tokenizer.json). Leaving model.circle in current dir." + ) + + +if __name__ == "__main__": + main() diff --git a/tools/opm/import.py b/tools/opm/import.py new file mode 100755 index 00000000000..9534aef9ccc --- /dev/null +++ b/tools/opm/import.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +import sys +import os +import argparse +import subprocess +import httpx +import urllib.request +from huggingface_hub import snapshot_download +from config import BUILD_DIR + + +def main(): + parser = argparse.ArgumentParser(description="Import model from HuggingFace") + parser.add_argument("model_id", help="HuggingFace model ID") + parser.add_argument( + "-r", + "--requirements", + help="Path to requirements file to install (default: requirements.txt)") + args = parser.parse_args() + + model_id = args.model_id + model_basename = model_id.split("/")[-1].lower() + + # Create build directory + os.makedirs(BUILD_DIR, exist_ok=True) + + # Download into build directory + target_dir = os.path.join(BUILD_DIR, model_basename) + print(f"Downloading model files for {model_id} into {target_dir}...") + + # Patch httpx to disable SSL verification and forward proxy if set + original_client = httpx.Client + proxy = urllib.request.getproxies() + + def patched_client(*args, **kwargs): + # Always disable SSL verification (needed for self‑signed certs) + kwargs.setdefault('verify', False) + # If a proxy is defined, pass it to the client + if proxy: + kwargs.setdefault('proxies', proxy) + return original_client(*args, **kwargs) + + httpx.Client = patched_client + + try: + snapshot_download(repo_id=model_id, local_dir=target_dir) + finally: + # Restore original httpx.Client + httpx.Client = original_client + + # Install requirements + requirements_file = args.requirements if args.requirements else "requirements.txt" + + requirements_path = os.path.abspath(requirements_file) + if not os.path.exists(requirements_path): + print(f"Error: {requirements_path} not found.") + return + + +if __name__ == "__main__": + main() diff --git a/tools/opm/init.py b/tools/opm/init.py new file mode 100755 index 00000000000..3a86266f02d --- /dev/null +++ b/tools/opm/init.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +import os +import shutil +import subprocess +import venv +from config import VENV_DIR, O2O_DIR, BUILD_DIR +from util import run_cmd + +# PR-related constants (used only in init.py) +PR_WORKTREE = "_pr_16233" +PR_BRANCH = "pr-16233" +PR_REF = "refs/pull/16233/head" + + +def main(): + # 1. Create virtual environment + if not os.path.exists(VENV_DIR): + print(f"Creating virtual environment at {VENV_DIR}...") + venv.create(VENV_DIR, with_pip=True) + else: + print(f"Virtual environment already exists at {VENV_DIR}.") + + # 2. Install opm requirements + pip_cmd = os.path.join(VENV_DIR, "bin", "pip") + # By installing Torch (cpu), it prevents TICO from pulling the large CUDA-enabled PyTorch package + run_cmd(f"{pip_cmd} install torch --index-url https://download.pytorch.org/whl/cpu") + run_cmd(f"{pip_cmd} install tico==0.1.0.dev251125") + + # 3. Prepare build directory for temporary worktree + os.makedirs(BUILD_DIR, exist_ok=True) + worktree_path = os.path.join(BUILD_DIR, PR_WORKTREE) + + # 4. Git worktree for PR and o2o extraction + if not os.path.exists(worktree_path): + # Fetch PR only if worktree doesn't exist + try: + run_cmd(f"git fetch https://github.com/Samsung/ONE.git {PR_REF}:{PR_BRANCH}") + except subprocess.CalledProcessError: + print("Fetch failed, possibly branch already exists. Continuing...") + + # Create worktree with no checkout + run_cmd(f"git worktree add --no-checkout -f {worktree_path} {PR_BRANCH}") + + # Configure sparse checkout + cwd = os.getcwd() + try: + os.chdir(worktree_path) + run_cmd("git sparse-checkout init --cone") + run_cmd(f"git sparse-checkout set tools/{O2O_DIR}") + # Populate files + run_cmd(f"git checkout {PR_BRANCH}") + finally: + os.chdir(cwd) + + # 5. Move o2o to current directory + if not os.path.exists(O2O_DIR): + src_o2o = os.path.join(worktree_path, "tools", O2O_DIR) + if os.path.exists(src_o2o): + print(f"Moving o2o tools to {O2O_DIR}...") + shutil.move(src_o2o, O2O_DIR) + else: + print("o2o tools not found in worktree.") + + # 6. Remove temporary worktree + if os.path.exists(worktree_path): + print("Removing temporary worktree...") + run_cmd(f"git worktree remove --force {worktree_path}") + + # 7. Make tools executable + if os.path.exists(O2O_DIR): + run_cmd(f"chmod +x {O2O_DIR}/*.py") + + print("opm init completed.") + + +if __name__ == "__main__": + main() diff --git a/tools/opm/opm b/tools/opm/opm new file mode 100644 index 00000000000..6be279c4ef5 --- /dev/null +++ b/tools/opm/opm @@ -0,0 +1,51 @@ +#!/bin/bash +SCRIPT_DIR=$(dirname "$0") +COMMAND="$1" + +if [ -z "$COMMAND" ] || [ "$COMMAND" == "-h" ] || [ "$COMMAND" == "--help" ]; then + cat << 'EOF' +OPM (ONE package manager) + +Usage: opm [options] + +Commands: + init Initialize environment (create venv, install dependencies, fetch o2o tools) + import Download a model from HuggingFace + Options: -r (default: requirements.txt) + export Export model to GGMA package + Options: -s -p (default: pipeline.yaml) + clean Clean build artifacts + Options: --all (remove venv and o2o as well) + +Examples: + opm init + opm import Maykeye/TinyLLama-v0 + opm import Maykeye/TinyLLama-v0 -r custom.requirements + opm export -s tinyllama.py + opm export -s tinyllama.py -p custom.pipeline + opm clean --all + +For more information, see tools/opm/README.md +EOF + exit 0 +fi + +shift # Remove command from arguments + +if [ ! -f "$SCRIPT_DIR/$COMMAND.py" ]; then + echo "Error: Invalid command '$COMMAND'" + echo "Run 'opm --help' to see available commands" + exit 1 +fi + +if [ "$COMMAND" = "init" ]; then + PYTHON="python3" +else + if [ ! -x "venv/bin/python3" ]; then + echo "Error: Virtual environment not found!" + echo "Please run 'opm init' first to set up the environment." + exit 1 + fi + PYTHON="venv/bin/python3" +fi +$PYTHON "$SCRIPT_DIR/$COMMAND.py" "$@" diff --git a/tools/opm/pyproject.toml b/tools/opm/pyproject.toml new file mode 100644 index 00000000000..1ddee7c1d3e --- /dev/null +++ b/tools/opm/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "opm" +version = "0.1.0" +description = "circle package manager" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "httpx>=0.28.1", + "huggingface-hub>=1.2.1", + "pyyaml>=6.0.3", +] diff --git a/tools/opm/util.py b/tools/opm/util.py new file mode 100644 index 00000000000..e6c96bb8364 --- /dev/null +++ b/tools/opm/util.py @@ -0,0 +1,6 @@ +import subprocess + + +def run_cmd(cmd, cwd=None, env=None, check=True): + print(f"Running: {cmd}") + subprocess.run(cmd, shell=True, cwd=cwd, env=env, check=check)