Skip to content

Commit 3ba78a6

Browse files
V4.2.4: PyTorch CUDAExtension for production CUDA compilation
1 parent 82fd343 commit 3ba78a6

File tree

2 files changed

+144
-185
lines changed

2 files changed

+144
-185
lines changed

Crayon_Colab_Notebook.py

Lines changed: 77 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""
2-
XERV CRAYON V4.2.4 - Omni-Backend Tokenizer
3-
=============================================
2+
XERV CRAYON V4.2.4 - Production Omni-Backend Tokenizer
3+
=======================================================
44
Copy this ENTIRE script into a Google Colab cell and run it.
55
66
IMPORTANT: Enable GPU runtime first:
77
Runtime -> Change runtime type -> GPU (T4/V100/A100)
8+
9+
This version uses PyTorch's CUDAExtension for reliable CUDA compilation.
810
"""
911

1012
import subprocess
@@ -13,10 +15,9 @@
1315
import time
1416

1517
print("=" * 70)
16-
print("XERV CRAYON INSTALLATION V4.2.4")
18+
print("XERV CRAYON V4.2.4 INSTALLATION")
1719
print("=" * 70)
1820

19-
# Step 1: GPU Detection
2021
print("\n[1/7] Detecting GPU hardware...")
2122
try:
2223
result = subprocess.run(["nvidia-smi", "--query-gpu=name,compute_cap", "--format=csv,noheader"],
@@ -32,80 +33,71 @@
3233
print(" No NVIDIA GPU detected")
3334
has_gpu = False
3435

35-
# Step 2: NVCC Detection
3636
print("\n[2/7] Checking CUDA compiler...")
3737
nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True)
3838
if nvcc_check.returncode == 0:
39-
nvcc_path = nvcc_check.stdout.strip()
40-
print(f" NVCC: {nvcc_path}")
41-
has_nvcc = True
39+
print(f" NVCC: {nvcc_check.stdout.strip()}")
4240
else:
4341
print(" NVCC not found")
44-
has_nvcc = False
4542

46-
# Step 3: Ensure PyTorch is installed (required for CUDAExtension)
47-
print("\n[3/7] Checking PyTorch...")
48-
try:
49-
import torch
50-
print(f" PyTorch {torch.__version__}")
51-
print(f" CUDA available: {torch.cuda.is_available()}")
52-
except ImportError:
53-
print(" Installing PyTorch...")
54-
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "torch"])
55-
import torch
56-
print(f" PyTorch {torch.__version__} installed")
57-
58-
# Step 4: Clean ALL Caches
59-
print("\n[4/7] Cleaning ALL caches...")
43+
print("\n[3/7] Ensuring PyTorch with CUDA...")
44+
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "torch"], capture_output=True)
45+
46+
import torch
47+
print(f" PyTorch: {torch.__version__}")
48+
print(f" CUDA available: {torch.cuda.is_available()}")
49+
if torch.cuda.is_available():
50+
print(f" CUDA version: {torch.version.cuda}")
51+
52+
print("\n[4/7] Cleaning caches...")
6053
os.system("pip uninstall -y xerv-crayon crayon 2>/dev/null")
6154
os.system("pip cache purge 2>/dev/null")
6255
os.system("rm -rf /tmp/crayon* ~/.cache/pip 2>/dev/null")
6356
print(" Done")
6457

65-
# Step 5: Fresh Clone
6658
print("\n[5/7] Cloning from GitHub...")
6759
timestamp = int(time.time())
6860
clone_dir = f"/tmp/crayon_{timestamp}"
6961
os.system(f"git clone --depth 1 https://github.com/Electroiscoding/CRAYON.git {clone_dir}")
7062

7163
version_check = subprocess.run(["grep", "-m1", "__version__", f"{clone_dir}/src/crayon/__init__.py"],
7264
capture_output=True, text=True)
73-
print(f" {version_check.stdout.strip()}")
65+
print(f" Source version: {version_check.stdout.strip()}")
7466

75-
# Step 6: Build and Install
76-
print("\n[6/7] Building with CUDA support (this takes ~2 min)...")
67+
print("\n[6/7] Building with PyTorch CUDAExtension...")
7768
print("-" * 70)
7869

79-
env = os.environ.copy()
80-
env["CUDA_HOME"] = "/usr/local/cuda"
70+
build_env = os.environ.copy()
71+
build_env["CUDA_HOME"] = "/usr/local/cuda"
8172

8273
result = subprocess.run(
8374
[sys.executable, "-m", "pip", "install", "-v", "--no-cache-dir", "--no-build-isolation", clone_dir],
84-
env=env
75+
env=build_env
8576
)
8677

8778
print("-" * 70)
8879

89-
# Step 7: Verify
9080
print("\n[7/7] Verifying installation...")
9181

9282
for key in list(sys.modules.keys()):
9383
if "crayon" in key:
9484
del sys.modules[key]
9585

9686
import crayon
97-
print(f"\n Version: {crayon.get_version()}")
87+
print(f"\n Installed version: {crayon.get_version()}")
9888
backends = crayon.check_backends()
9989
print(f" Backends: {backends}")
10090

10191
if backends.get("cuda"):
102-
print(" CUDA: READY", "\u2705")
103-
elif has_gpu and has_nvcc:
104-
print(" WARNING: GPU detected but CUDA not compiled!")
105-
print(" Check build output above for nvcc errors")
92+
print(" CUDA backend: READY", "")
93+
else:
94+
if has_gpu:
95+
print(" CUDA backend: NOT AVAILABLE (check build logs above)")
96+
else:
97+
print(" CUDA backend: NOT AVAILABLE (no GPU)")
10698

10799
print("\n" + "=" * 70)
108-
print("TOKENIZER TEST")
100+
print("TOKENIZER INITIALIZATION")
109101
print("=" * 70)
110102

111103
from crayon import CrayonVocab
@@ -114,40 +106,78 @@
114106
vocab.load_profile("lite")
115107

116108
info = vocab.get_info()
117-
print(f"\nDevice: {info['device'].upper()}")
109+
print(f"\nActive Device: {info['device'].upper()}")
118110
print(f"Backend: {info['backend']}")
119111
print(f"Vocabulary: {vocab.vocab_size:,} tokens")
120112

121-
text = "Hello, Crayon!"
113+
text = "Hello, Crayon tokenizer!"
122114
tokens = vocab.tokenize(text)
123-
print(f"\nTest: '{text}' -> {tokens}")
115+
print(f"\nQuick Test: '{text}'")
116+
print(f"Tokens: {tokens}")
117+
print(f"Count: {len(tokens)}")
124118

125119
print("\n" + "=" * 70)
126-
print("BENCHMARKS")
120+
print("PERFORMANCE BENCHMARKS")
127121
print("=" * 70)
128122

129123
base_text = "The quick brown fox jumps over the lazy dog."
130124

131-
print("\n--- Throughput ---")
125+
print("\n--- Latency (single string) ---")
126+
iterations = 10000
127+
for _ in range(100):
128+
vocab.tokenize(base_text)
129+
start = time.perf_counter()
130+
for _ in range(iterations):
131+
vocab.tokenize(base_text)
132+
elapsed = time.perf_counter() - start
133+
print(f"Latency: {(elapsed/iterations)*1e6:.2f} us/call")
134+
print(f"Calls/sec: {iterations/elapsed:,.0f}")
135+
136+
print("\n--- Batch Throughput ---")
137+
print(f"{'Batch':>10} | {'Docs/sec':>14} | {'Tokens/sec':>16}")
138+
print("-" * 48)
139+
132140
for batch_size in [1000, 10000, 50000]:
133141
batch = [base_text] * batch_size
134142
vocab.tokenize(batch[:10])
135143
start = time.time()
136144
results = vocab.tokenize(batch)
137145
duration = time.time() - start
138146
total_tokens = sum(len(r) for r in results)
139-
print(f"{batch_size:>8}: {batch_size/duration:>12,.0f} docs/sec | {total_tokens/duration:>14,.0f} tokens/sec")
147+
print(f"{batch_size:>10,} | {batch_size/duration:>14,.0f} | {total_tokens/duration:>16,.0f}")
140148

141149
if vocab.device != "cpu":
142-
print(f"\n--- GPU Stress Test ---")
150+
print(f"\n--- GPU Stress Test ({vocab.device.upper()}) ---")
143151
for batch_size in [100000, 500000]:
144152
batch = [base_text] * batch_size
153+
torch.cuda.synchronize() if torch.cuda.is_available() else None
145154
start = time.time()
146155
results = vocab.tokenize(batch)
156+
torch.cuda.synchronize() if torch.cuda.is_available() else None
147157
duration = time.time() - start
148-
print(f"{batch_size:>8}: {batch_size/duration:>12,.0f} docs/sec in {duration:.3f}s")
158+
total_tokens = sum(len(r) for r in results)
159+
print(f"{batch_size:>10,}: {batch_size/duration:>12,.0f} docs/sec | {duration:.3f}s")
149160

150-
vocab.close()
151161
print("\n" + "=" * 70)
152-
print("DONE!")
162+
print("ENCODE/DECODE VERIFICATION")
153163
print("=" * 70)
164+
165+
test_cases = [
166+
"Hello, world!",
167+
"The quick brown fox.",
168+
"def forward(self, x): return x",
169+
]
170+
171+
all_passed = True
172+
for text in test_cases:
173+
tokens = vocab.tokenize(text)
174+
decoded = vocab.decode(tokens)
175+
passed = text == decoded
176+
all_passed = all_passed and passed
177+
status = "PASS" if passed else "FAIL"
178+
print(f"[{status}] '{text}' -> {len(tokens)} tokens")
179+
180+
print(f"\nAll tests: {'PASSED' if all_passed else 'FAILED'}")
181+
182+
vocab.close()
183+
print("\nDone!")

0 commit comments

Comments
 (0)