Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,9 @@ dev = [
"pre-commit==4.4.0",
]

[tool.uv.workspace]
members = [
"tools/opm",
]

# This file does not describe packaging details for compiler and runtime
58 changes: 58 additions & 0 deletions tools/opm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# OPM (ONE or circle Package Manager)

`opm` is a set of utility scripts to simplify the process of creating GGMA packages for ONE runtime. It handles environment setup, model downloading, and the export pipeline.

## Usage

The tools are designed to be run via the `opm` wrapper script (or directly via python if preferred).

### 1. Init

Initialize the environment. This creates a virtual environment, installs dependencies (including a CPU-only version of torch to avoid large downloads), and fetches the necessary `o2o` tools from the ONE repository.

```bash
$ opm init
```

### 2. Import

Download a model.

```bash
$ opm import <model_id|url> [-r <requirements_file>]
```

- `<model_id|url>`: HuggingFace model ID (e.g., `Maykeye/TinyLLama-v0`) or direct URL.
- `-r, --requirements`: (Optional) Path to a requirements file to install specific dependencies for the model.

Example:
```bash
$ opm import Maykeye/TinyLLama-v0 -r tinyllama/tinyllama.requirements
```

### 3. Export

Export the downloaded model to a GGMA package (`.circle` file + tokenizer). This runs the specified export script and pipeline configuration.

```bash
$ opm export -s <export_script> -p <pipeline_config>
```

- `-s, --script`: Path to the python export script (e.g., `tinyllama/tinyllama.py`).
- `-p, --pipeline`: Path to the pipeline configuration YAML file (e.g., `tinyllama/tinyllama.pipeline`).

Example:
```bash
$ opm export -s tinyllama/tinyllama.py -p tinyllama/tinyllama.pipeline
```

### 4. Clean

Clean up build artifacts.

```bash
$ opm clean [--all]
```

- Default: Removes the `build/` directory.
- `--all`: Removes `build/`, `venv/`, `o2o/`, and `TICO/` (full reset).
37 changes: 37 additions & 0 deletions tools/opm/clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python3
import shutil
import os

import argparse
from config import VENV_DIR, O2O_DIR, BUILD_DIR


def remove_dir(path):
"""Remove directory if it exists, print message only if removed."""
try:
shutil.rmtree(path)
print(f"Removing {path} directory...")
except FileNotFoundError:
pass


def main():
parser = argparse.ArgumentParser(description="Clean build artifacts")
parser.add_argument("--all",
action="store_true",
help="Remove all generated files including venv and o2o")
args = parser.parse_args()

# Always remove build directory
remove_dir(BUILD_DIR)

if args.all:
remove_dir(O2O_DIR)
remove_dir(VENV_DIR)
print("Full clean complete.")
else:
print("Clean complete.")


if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions tools/opm/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# OPM Constants
VENV_DIR = "venv"
O2O_DIR = "o2o"
BUILD_DIR = "build"
121 changes: 121 additions & 0 deletions tools/opm/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/usr/bin/env python3
import argparse
import shutil
import json
import yaml
import os
import sys
from config import O2O_DIR, BUILD_DIR
from util import run_cmd


def main():
parser = argparse.ArgumentParser(description="Export model to GGMA package")
parser.add_argument("-s",
"--script",
required=True,
help="Export script to use (e.g., tinyllama.py)")
parser.add_argument("-p",
"--pipeline",
default="pipeline.yaml",
help="Pipeline configuration file (default: pipeline.yaml)")
args = parser.parse_args()

export_script_name = args.script
pipeline_file = args.pipeline

# Change to build directory
if not os.path.exists(BUILD_DIR):
print(f"Error: {BUILD_DIR} directory does not exist. Run 'opm import' first.")
return

os.chdir(BUILD_DIR)

# Load pipeline configuration
pipeline_path = os.path.abspath(pipeline_file)
if not os.path.exists(pipeline_path):
print(f"Error: Pipeline file {pipeline_path} not found.")
return

with open(pipeline_path, "r") as f:
pipeline_config = yaml.safe_load(f)

# Find model directory by config.json
model_dir = None
for d in os.listdir("."):
config_path = os.path.join(d, "config.json")
if os.path.isdir(d) and os.path.exists(config_path):
model_dir = d
print(f"Using local model directory: {model_dir}")
break

if not model_dir:
raise ValueError("No local model directory found (directory with config.json)")

# Add o2o tools and the current directory (where pipeline scripts reside) to PATH
env = os.environ.copy()
cwd_path = os.path.abspath(os.getcwd())
o2o_path = os.path.abspath(os.path.join(cwd_path, "..", O2O_DIR))
env["PATH"] = f"{o2o_path}:{cwd_path}:{env['PATH']}"
os.environ.update(env)

# Use current python executable instead of finding venv python
python_bin = sys.executable
export_script = os.path.join("..", export_script_name)

# 1. Generate prefill and decode circles
print(f"Running {export_script_name} (prefill)...")
run_cmd(f"{python_bin} {export_script} --mode prefill", env=env)

print(f"Running {export_script_name} (decode)...")
run_cmd(f"{python_bin} {export_script} --mode decode", env=env)

# Helper to run pipeline command
def run_pipeline_step(step_name):
if step_name in pipeline_config:
print(f"Running {step_name} pipeline...")
cmd = pipeline_config[step_name]
# If cmd is a multiline string (from YAML |), it might contain newlines.
# We can replace newlines with spaces or let the shell handle it if it's a single command string.
# For safety with pipes, we replace newlines with spaces if they are meant to be a single line command.
# But YAML block scalar preserves newlines.
# If the user wrote it with pipes at the start of lines, we should join them.
cmd = cmd.replace("\n", " ")
run_cmd(cmd, env=env)

# 2. Pipeline (decode)
run_pipeline_step("decode")

# 3. Merge
run_pipeline_step("merge")

# 4. Create package directory and copy files
# Find source directory with tokenizer.json
source_dir = None
for d in os.listdir("."):
if os.path.isdir(d) and os.path.exists(os.path.join(d, "tokenizer.json")):
source_dir = d
break

if source_dir:
package_dir = "out"
print(f"Creating package directory {package_dir}...")
os.makedirs(package_dir, exist_ok=True)

# Copy tokenizer and config files
for filename in ["tokenizer.json", "tokenizer.model", "config.json"]:
src = os.path.join(source_dir, filename)
if os.path.exists(src):
shutil.copy2(src, package_dir)

# Move model.circle
print(f"Moving model.circle to {package_dir}...")
shutil.move("model.circle", os.path.join(package_dir, "model.circle"))
else:
print(
"Warning: Could not find source directory (directory with tokenizer.json). Leaving model.circle in current dir."
)


if __name__ == "__main__":
main()
61 changes: 61 additions & 0 deletions tools/opm/import.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python3
import sys
import os
import argparse
import subprocess
import httpx
import urllib.request
from huggingface_hub import snapshot_download
from config import BUILD_DIR


def main():
parser = argparse.ArgumentParser(description="Import model from HuggingFace")
parser.add_argument("model_id", help="HuggingFace model ID")
parser.add_argument(
"-r",
"--requirements",
help="Path to requirements file to install (default: requirements.txt)")
args = parser.parse_args()

model_id = args.model_id
model_basename = model_id.split("/")[-1].lower()

# Create build directory
os.makedirs(BUILD_DIR, exist_ok=True)

# Download into build directory
target_dir = os.path.join(BUILD_DIR, model_basename)
print(f"Downloading model files for {model_id} into {target_dir}...")

# Patch httpx to disable SSL verification and forward proxy if set
original_client = httpx.Client
proxy = urllib.request.getproxies()

def patched_client(*args, **kwargs):
# Always disable SSL verification (needed for self‑signed certs)
kwargs.setdefault('verify', False)
# If a proxy is defined, pass it to the client
if proxy:
kwargs.setdefault('proxies', proxy)
return original_client(*args, **kwargs)

httpx.Client = patched_client

try:
snapshot_download(repo_id=model_id, local_dir=target_dir)
finally:
# Restore original httpx.Client
httpx.Client = original_client

# Install requirements
requirements_file = args.requirements if args.requirements else "requirements.txt"

requirements_path = os.path.abspath(requirements_file)
if not os.path.exists(requirements_path):
print(f"Error: {requirements_path} not found.")
return


if __name__ == "__main__":
main()
77 changes: 77 additions & 0 deletions tools/opm/init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python3
import os
import shutil
import subprocess
import venv
from config import VENV_DIR, O2O_DIR, BUILD_DIR
from util import run_cmd

# PR-related constants (used only in init.py)
PR_WORKTREE = "_pr_16233"
PR_BRANCH = "pr-16233"
PR_REF = "refs/pull/16233/head"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



def main():
# 1. Create virtual environment
if not os.path.exists(VENV_DIR):
print(f"Creating virtual environment at {VENV_DIR}...")
venv.create(VENV_DIR, with_pip=True)
else:
print(f"Virtual environment already exists at {VENV_DIR}.")

# 2. Install opm requirements
pip_cmd = os.path.join(VENV_DIR, "bin", "pip")
# By installing Torch (cpu), it prevents TICO from pulling the large CUDA-enabled PyTorch package
run_cmd(f"{pip_cmd} install torch --index-url https://download.pytorch.org/whl/cpu")
run_cmd(f"{pip_cmd} install tico==0.1.0.dev251125")

# 3. Prepare build directory for temporary worktree
os.makedirs(BUILD_DIR, exist_ok=True)
worktree_path = os.path.join(BUILD_DIR, PR_WORKTREE)

# 4. Git worktree for PR and o2o extraction
if not os.path.exists(worktree_path):
# Fetch PR only if worktree doesn't exist
try:
run_cmd(f"git fetch https://github.com/Samsung/ONE.git {PR_REF}:{PR_BRANCH}")
except subprocess.CalledProcessError:
print("Fetch failed, possibly branch already exists. Continuing...")

# Create worktree with no checkout
run_cmd(f"git worktree add --no-checkout -f {worktree_path} {PR_BRANCH}")

# Configure sparse checkout
cwd = os.getcwd()
try:
os.chdir(worktree_path)
run_cmd("git sparse-checkout init --cone")
run_cmd(f"git sparse-checkout set tools/{O2O_DIR}")
# Populate files
run_cmd(f"git checkout {PR_BRANCH}")
finally:
os.chdir(cwd)

# 5. Move o2o to current directory
if not os.path.exists(O2O_DIR):
src_o2o = os.path.join(worktree_path, "tools", O2O_DIR)
if os.path.exists(src_o2o):
print(f"Moving o2o tools to {O2O_DIR}...")
shutil.move(src_o2o, O2O_DIR)
else:
print("o2o tools not found in worktree.")

# 6. Remove temporary worktree
if os.path.exists(worktree_path):
print("Removing temporary worktree...")
run_cmd(f"git worktree remove --force {worktree_path}")

# 7. Make tools executable
if os.path.exists(O2O_DIR):
run_cmd(f"chmod +x {O2O_DIR}/*.py")

print("opm init completed.")


if __name__ == "__main__":
main()
Loading