Skip to content

Commit 9547541

Browse files
V4.2.2: Fixed CUDA compilation - proper nvcc integration for T4/V100/A100
1 parent 110f8eb commit 9547541

File tree

4 files changed

+193
-267
lines changed

4 files changed

+193
-267
lines changed

Crayon_Colab_Notebook.py

Lines changed: 69 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,51 @@
11
"""
2-
XERV CRAYON V4.2.0 - Omni-Backend Tokenizer
2+
XERV CRAYON V4.2.2 - Omni-Backend Tokenizer
33
============================================
44
Copy this entire file into Google Colab and run all cells.
55
Works on CPU, NVIDIA GPU (T4/V100/A100), and AMD GPU.
6+
7+
IMPORTANT: Enable GPU runtime for best performance:
8+
Runtime -> Change runtime type -> GPU
69
"""
710

811
# ═══════════════════════════════════════════════════════════════════════════════
9-
# CELL 1: INSTALL CRAYON (WITH AUTOMATIC GPU DETECTION)
12+
# CELL 1: INSTALL CRAYON (ALWAYS BUILDS FROM SOURCE FOR GPU SUPPORT)
1013
# ═══════════════════════════════════════════════════════════════════════════════
1114

1215
import subprocess
1316
import sys
1417
import os
1518

16-
def detect_gpu():
17-
try:
18-
result = subprocess.run(["nvidia-smi"], capture_output=True, timeout=10)
19-
return result.returncode == 0
20-
except:
21-
return False
22-
23-
has_gpu = detect_gpu()
24-
print(f"GPU Detected: {has_gpu}")
25-
26-
if has_gpu:
27-
print("Building from source with CUDA support...")
28-
os.system("rm -rf /tmp/crayon 2>/dev/null")
29-
result = os.system("git clone --depth 1 https://github.com/Electroiscoding/CRAYON.git /tmp/crayon 2>/dev/null")
30-
if result == 0:
31-
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "--no-build-isolation", "/tmp/crayon"])
19+
print("Detecting hardware...")
20+
try:
21+
result = subprocess.run(["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
22+
capture_output=True, text=True, timeout=10)
23+
if result.returncode == 0:
24+
gpu_name = result.stdout.strip()
25+
print(f"GPU Found: {gpu_name}")
3226
else:
33-
print("Git clone failed, installing from TestPyPI (CPU only)")
34-
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
35-
"-i", "https://test.pypi.org/simple/",
36-
"--extra-index-url", "https://pypi.org/simple/", "xerv-crayon"])
37-
else:
38-
print("Installing pre-built CPU version...")
39-
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
40-
"-i", "https://test.pypi.org/simple/",
41-
"--extra-index-url", "https://pypi.org/simple/", "xerv-crayon"])
27+
gpu_name = None
28+
print("No NVIDIA GPU detected")
29+
except:
30+
gpu_name = None
31+
print("No NVIDIA GPU detected")
32+
33+
print("Installing Crayon from source (with GPU compilation if available)...")
34+
os.system("rm -rf /tmp/crayon 2>/dev/null")
35+
os.system("git clone --depth 1 https://github.com/Electroiscoding/CRAYON.git /tmp/crayon")
36+
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "--no-build-isolation", "/tmp/crayon"])
4237

4338
import crayon
4439
print(f"Crayon v{crayon.get_version()} installed")
45-
print(f"Available backends: {crayon.check_backends()}")
40+
backends = crayon.check_backends()
41+
print(f"Available backends: {backends}")
42+
43+
if gpu_name and not backends.get("cuda"):
44+
print("WARNING: GPU detected but CUDA backend not available.")
45+
print("This may be due to compilation issues. Check build logs above.")
4646

4747
# ═══════════════════════════════════════════════════════════════════════════════
48-
# CELL 2: INITIALIZE TOKENIZER (AUTO-DETECTS GPU)
48+
# CELL 2: INITIALIZE TOKENIZER
4949
# ═══════════════════════════════════════════════════════════════════════════════
5050

5151
from crayon import CrayonVocab
@@ -54,9 +54,9 @@ def detect_gpu():
5454
vocab.load_profile("lite")
5555

5656
info = vocab.get_info()
57-
print(f"Device: {info['device'].upper()}")
57+
print(f"Active Device: {info['device'].upper()}")
5858
print(f"Backend: {info['backend']}")
59-
print(f"Vocab Size: {vocab.vocab_size:,} tokens")
59+
print(f"Vocabulary Size: {vocab.vocab_size:,} tokens")
6060

6161
# ═══════════════════════════════════════════════════════════════════════════════
6262
# CELL 3: BASIC TOKENIZATION
@@ -67,118 +67,91 @@ def detect_gpu():
6767

6868
print(f"Input: {text}")
6969
print(f"Tokens: {tokens}")
70-
print(f"Count: {len(tokens)}")
71-
72-
# ═══════════════════════════════════════════════════════════════════════════════
73-
# CELL 4: BATCH TOKENIZATION
74-
# ═══════════════════════════════════════════════════════════════════════════════
75-
76-
batch = [
77-
"The quick brown fox jumps over the lazy dog.",
78-
"Machine learning powers modern AI systems.",
79-
"def forward(self, x): return torch.relu(x)",
80-
]
81-
82-
batch_tokens = vocab.tokenize(batch)
83-
84-
for i, (text, toks) in enumerate(zip(batch, batch_tokens)):
85-
print(f"[{i+1}] {text[:40]}... -> {len(toks)} tokens")
70+
print(f"Token Count: {len(tokens)}")
8671

8772
# ═══════════════════════════════════════════════════════════════════════════════
88-
# CELL 5: LATENCY BENCHMARK
73+
# CELL 4: LATENCY BENCHMARK (SINGLE STRING)
8974
# ═══════════════════════════════════════════════════════════════════════════════
9075

9176
import time
9277

93-
text = "Crayon optimizes tokenization at the silicon level with AVX2 SIMD."
78+
text = "The quick brown fox jumps over the lazy dog."
9479
iterations = 10000
9580

9681
for _ in range(100):
97-
_ = vocab.tokenize(text)
82+
vocab.tokenize(text)
9883

9984
start = time.perf_counter()
10085
for _ in range(iterations):
101-
_ = vocab.tokenize(text)
86+
vocab.tokenize(text)
10287
elapsed = time.perf_counter() - start
10388

10489
latency_us = (elapsed / iterations) * 1_000_000
105-
print(f"Latency: {latency_us:.2f} us/call")
106-
print(f"Throughput: {iterations / elapsed:,.0f} calls/sec")
90+
print(f"Single-String Latency: {latency_us:.2f} microseconds")
91+
print(f"Calls per Second: {iterations / elapsed:,.0f}")
10792

10893
# ═══════════════════════════════════════════════════════════════════════════════
109-
# CELL 6: BATCH THROUGHPUT BENCHMARK
94+
# CELL 5: BATCH THROUGHPUT BENCHMARK
11095
# ═══════════════════════════════════════════════════════════════════════════════
11196

11297
base_text = "The quick brown fox jumps over the lazy dog."
11398

114-
for batch_size in [100, 1000, 10000]:
99+
print("\nBatch Throughput Results:")
100+
print("-" * 60)
101+
102+
for batch_size in [100, 1000, 10000, 50000]:
115103
batch = [base_text] * batch_size
116104

117-
_ = vocab.tokenize(batch[:10])
105+
vocab.tokenize(batch[:10])
118106

119107
start = time.time()
120108
results = vocab.tokenize(batch)
121109
duration = time.time() - start
122110

123111
total_tokens = sum(len(r) for r in results)
112+
docs_per_sec = batch_size / duration
113+
tokens_per_sec = total_tokens / duration
124114

125-
print(f"Batch {batch_size:>5}: {batch_size/duration:>10,.0f} docs/sec | {total_tokens/duration:>12,.0f} tokens/sec")
115+
print(f"Batch {batch_size:>6}: {docs_per_sec:>12,.0f} docs/sec | {tokens_per_sec:>14,.0f} tokens/sec")
126116

127117
# ═══════════════════════════════════════════════════════════════════════════════
128-
# CELL 7: GPU STRESS TEST (100K DOCUMENTS)
118+
# CELL 6: GPU STRESS TEST (IF AVAILABLE)
129119
# ═══════════════════════════════════════════════════════════════════════════════
130120

131121
if vocab.device != "cpu":
132-
batch_size = 100_000
133-
batch = ["The quick brown fox jumps over the lazy dog."] * batch_size
134-
135-
print(f"Processing {batch_size:,} documents on {vocab.device.upper()}...")
122+
print(f"\nGPU Stress Test ({vocab.device.upper()}):")
123+
print("-" * 60)
136124

137-
start = time.time()
138-
results = vocab.tokenize(batch)
139-
duration = time.time() - start
140-
141-
total_tokens = sum(len(r) for r in results)
142-
143-
print(f"Duration: {duration:.4f}s")
144-
print(f"Throughput: {batch_size/duration:,.0f} docs/sec")
145-
print(f"Token Rate: {total_tokens/duration:,.0f} tokens/sec")
125+
for batch_size in [10000, 50000, 100000]:
126+
batch = [base_text] * batch_size
127+
128+
start = time.time()
129+
results = vocab.tokenize(batch)
130+
duration = time.time() - start
131+
132+
total_tokens = sum(len(r) for r in results)
133+
print(f"Batch {batch_size:>6}: {batch_size/duration:>12,.0f} docs/sec | {total_tokens/duration:>14,.0f} tokens/sec in {duration:.3f}s")
146134
else:
147-
print("Skipping GPU stress test (running on CPU)")
148-
print("Enable GPU: Runtime -> Change runtime type -> GPU")
135+
print("\nGPU stress test skipped (running on CPU)")
136+
print("To enable GPU: Runtime -> Change runtime type -> GPU")
149137

150138
# ═══════════════════════════════════════════════════════════════════════════════
151-
# CELL 8: PROFILE SWITCHING
152-
# ═══════════════════════════════════════════════════════════════════════════════
153-
154-
code = "def forward(self, x): return torch.matmul(x, w)"
155-
156-
tokens_lite = vocab.tokenize(code)
157-
print(f"[LITE] {len(tokens_lite)} tokens")
158-
159-
try:
160-
with vocab.using_profile("code"):
161-
tokens_code = vocab.tokenize(code)
162-
print(f"[CODE] {len(tokens_code)} tokens")
163-
except FileNotFoundError:
164-
print("[CODE] Profile not available")
165-
166-
# ═══════════════════════════════════════════════════════════════════════════════
167-
# CELL 9: ENCODE/DECODE ROUND-TRIP
139+
# CELL 7: ENCODE/DECODE ROUND-TRIP
168140
# ═══════════════════════════════════════════════════════════════════════════════
169141

170142
original = "Hello, Crayon!"
171143
tokens = vocab.tokenize(original)
172144
decoded = vocab.decode(tokens)
173145

174-
print(f"Original: {original}")
175-
print(f"Tokens: {tokens}")
176-
print(f"Decoded: {decoded}")
177-
print(f"Match: {original == decoded}")
146+
print(f"\nRound-Trip Test:")
147+
print(f" Original: {original}")
148+
print(f" Tokens: {tokens}")
149+
print(f" Decoded: {decoded}")
150+
print(f" Match: {original == decoded}")
178151

179152
# ═══════════════════════════════════════════════════════════════════════════════
180-
# CELL 10: CLEANUP
153+
# CELL 8: CLEANUP
181154
# ═══════════════════════════════════════════════════════════════════════════════
182155

183156
vocab.close()
184-
print("Done!")
157+
print("\nDone!")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "xerv-crayon"
7-
version = "4.2.1"
7+
version = "4.2.2"
88
description = "Omni-Backend Tokenizer - CPU (AVX2/512), CUDA (NVIDIA), ROCm (AMD) with automatic hardware detection"
99
readme = "README.md"
1010
requires-python = ">=3.10"

0 commit comments

Comments
 (0)