Skip to content

Commit c953302

Browse files
V4.2.6: Smart Arch Detection + Serial Build + Realtime Colab Logs
1 parent 293c9c2 commit c953302

File tree

4 files changed

+76
-34
lines changed

4 files changed

+76
-34
lines changed

Crayon_Colab_Notebook.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
XERV CRAYON V4.2.5 - Production Omni-Backend Tokenizer
2+
XERV CRAYON V4.2.6 - Production Omni-Backend Tokenizer
33
=======================================================
44
Copy this ENTIRE script into a Google Colab cell and run it.
55
@@ -13,7 +13,7 @@
1313
import time
1414

1515
print("=" * 70)
16-
print("XERV CRAYON V4.2.5 INSTALLATION")
16+
print("XERV CRAYON V4.2.6 INSTALLATION")
1717
print("=" * 70)
1818

1919
# 1. Environment Check
@@ -23,6 +23,7 @@
2323
print(f" PyTorch: {torch.__version__}")
2424
if torch.cuda.is_available():
2525
print(f" CUDA: {torch.version.cuda} ({torch.cuda.get_device_name(0)})")
26+
print(" * Smart Build: Will compile ONLY for this GPU architecture")
2627
else:
2728
print(" CUDA: Not available (CPU only)")
2829
except ImportError:
@@ -62,29 +63,36 @@
6263
print(f" {v_check.stdout.strip()}")
6364

6465

65-
# 5. Build & Install
66-
print("\n[5/7] Compiling and Installing (this may take 2-3 minutes)...")
66+
# 5. Build & Install (Streaming Output)
67+
print("\n[5/7] Compiling and Installing (Streaming Logs)...")
6768
print("-" * 70)
6869

6970
build_env = os.environ.copy()
70-
build_env["MAX_JOBS"] = "4" # Prevent OOM
71-
build_env["CUDA_HOME"] = "/usr/local/cuda"
71+
build_env["MAX_JOBS"] = "1" # Force serial build to prevent OOM
72+
build_env["cuda_home"] = "/usr/local/cuda"
73+
74+
# Stream output line-by-line
75+
cmd = [sys.executable, "-m", "pip", "install", "-v", "--no-build-isolation", clone_dir]
76+
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=build_env, text=True)
77+
78+
# Print output while running
79+
while True:
80+
line = process.stdout.readline()
81+
if not line and process.poll() is not None:
82+
break
83+
if line:
84+
print(line.rstrip())
85+
86+
rc = process.poll()
87+
print("-" * 70)
7288

73-
# We use check_call so it throws error on failure
74-
try:
75-
subprocess.check_call(
76-
[sys.executable, "-m", "pip", "install", "-v", "--no-build-isolation", clone_dir],
77-
env=build_env
78-
)
79-
except subprocess.CalledProcessError:
89+
if rc != 0:
8090
print("\n" + "!" * 70)
8191
print("FATAL ERROR: Installation failed!")
82-
print("Please check the error log above.")
92+
print(f"Exit Code: {rc}")
8393
print("!" * 70)
8494
sys.exit(1)
8595

86-
print("-" * 70)
87-
8896

8997
# 6. Verification
9098
print("\n[6/7] Verifying installation...")
@@ -95,7 +103,7 @@
95103

96104
try:
97105
import crayon
98-
print(f" Succcess! Installed version: {crayon.get_version()}")
106+
print(f" Success! Installed version: {crayon.get_version()}")
99107
backends = crayon.check_backends()
100108
print(f" Backends: {backends}")
101109
except ImportError as e:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "xerv-crayon"
7-
version = "4.2.5"
7+
version = "4.2.6"
88
description = "Omni-Backend Tokenizer - CPU (AVX2/512), CUDA (NVIDIA), ROCm (AMD) with automatic hardware detection"
99
readme = "README.md"
1010
requires-python = ">=3.10"

setup.py

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
Features:
66
- PyTorch CUDAExtension for reliable NVCC compilation
77
- Automatic fallback to CPU if CUDA/ROCm unavailable
8-
- MAX_JOBS control to prevent OOM on smaller instances
8+
- Smart Architecture Detection: Compiles only for the active GPU to save RAM/Time
9+
- MAX_JOBS control to prevent OOM
910
"""
1011

1112
import os
@@ -18,14 +19,14 @@
1819
# VERSION
1920
# ============================================================================
2021

21-
VERSION = "4.2.5"
22+
VERSION = "4.2.6"
2223

2324
# ============================================================================
2425
# PRE-FLIGHT CHECKS
2526
# ============================================================================
2627

27-
# Control parallelism to prevent OOM
28-
os.environ["MAX_JOBS"] = os.environ.get("MAX_JOBS", "4")
28+
# Default to serial build to prevent OOM on Colab/Free tiers
29+
os.environ["MAX_JOBS"] = os.environ.get("MAX_JOBS", "1")
2930

3031
def log(msg: str, level: str = "INFO") -> None:
3132
print(f"[CRAYON-BUILD] {msg}", flush=True)
@@ -49,6 +50,45 @@ def log(msg: str, level: str = "INFO") -> None:
4950
HAS_ROCM = os.path.exists(os.path.join(ROCM_HOME, "bin", "hipcc"))
5051

5152

53+
# ============================================================================
54+
# ARCHITECTURE SELECTION
55+
# ============================================================================
56+
57+
def get_cuda_arch_flags():
58+
"""
59+
Determine the best CUDA architecture flags.
60+
If CRAYON_GENERIC_BUILD=1, build for all common architectures (for PyPI wheels).
61+
Otherwise, build ONLY for the detected GPU (faster, less RAM).
62+
"""
63+
base_flags = ["-O3", "-std=c++17", "--expt-relaxed-constexpr"]
64+
65+
# Generic build for distribution (Wheel)
66+
if os.environ.get("CRAYON_GENERIC_BUILD", "0") == "1":
67+
log("Building for ALL common CUDA architectures (Generic Wheel)")
68+
return base_flags + [
69+
"-gencode=arch=compute_70,code=sm_70", # V100
70+
"-gencode=arch=compute_75,code=sm_75", # T4
71+
"-gencode=arch=compute_80,code=sm_80", # A100
72+
"-gencode=arch=compute_86,code=sm_86", # RTX 3090
73+
"-gencode=arch=compute_90,code=sm_90", # H100
74+
]
75+
76+
# Local build (Colab/User Machine)
77+
if TORCH_CUDA_AVAILABLE:
78+
try:
79+
major, minor = torch.cuda.get_device_capability()
80+
arch = f"{major}{minor}"
81+
log(f"Detected GPU: SM {major}.{minor} -> Compiling for sm_{arch} ONLY")
82+
return base_flags + [f"-gencode=arch=compute_{arch},code=sm_{arch}"]
83+
except Exception as e:
84+
log(f"Error detecting GPU capability: {e}. Falling back to common archs.")
85+
86+
# Fallback if detection fails or no GPU present (but CUDA_HOME exists)
87+
return base_flags + [
88+
"-gencode=arch=compute_75,code=sm_75", # T4 (Safe default for Colab)
89+
]
90+
91+
5292
# ============================================================================
5393
# EXTENSION CONFIGURATION
5494
# ============================================================================
@@ -73,24 +113,18 @@ def log(msg: str, level: str = "INFO") -> None:
73113

74114
# --- 2. CUDA Extension (via PyTorch) ---
75115
if TORCH_CUDA_AVAILABLE and not FORCE_CPU and CUDAExtension:
76-
log(f"Configuring CUDA extension (PyTorch {torch.__version__}, CUDA {torch.version.cuda})")
116+
nvcc_flags = get_cuda_arch_flags()
117+
log(f"Configuring CUDA extension (max_jobs={os.environ['MAX_JOBS']})")
118+
77119
ext_modules.append(CUDAExtension(
78120
name="crayon.c_ext.crayon_cuda",
79121
sources=["src/crayon/c_ext/gpu_engine_cuda.cu"],
80122
extra_compile_args={
81123
"cxx": ["-O3", "-std=c++17"],
82-
"nvcc": [
83-
"-O3", "-std=c++17",
84-
"--expt-relaxed-constexpr",
85-
# Broad architecture support
86-
"-gencode=arch=compute_70,code=sm_70",
87-
"-gencode=arch=compute_75,code=sm_75",
88-
"-gencode=arch=compute_80,code=sm_80",
89-
"-gencode=arch=compute_86,code=sm_86",
90-
"-gencode=arch=compute_90,code=sm_90",
91-
],
124+
"nvcc": nvcc_flags,
92125
},
93126
))
127+
94128
elif not FORCE_CPU and CUDAExtension:
95129
log("Skipping CUDA extension (PyTorch CUDA not found or CUDA_HOME missing)")
96130

src/crayon/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
from __future__ import annotations
4747

48-
__version__ = "4.2.5"
48+
__version__ = "4.2.6"
4949
__author__ = "Xerv Research Engineering Division"
5050

5151
# ============================================================================

0 commit comments

Comments
 (0)