|
1 | 1 | """ |
2 | | -XERV CRAYON V4.2.4 - Omni-Backend Tokenizer |
3 | | -============================================= |
| 2 | +XERV CRAYON V4.2.4 - Production Omni-Backend Tokenizer |
| 3 | +======================================================= |
4 | 4 | Copy this ENTIRE script into a Google Colab cell and run it. |
5 | 5 |
|
6 | 6 | IMPORTANT: Enable GPU runtime first: |
7 | 7 | Runtime -> Change runtime type -> GPU (T4/V100/A100) |
| 8 | +
|
| 9 | +This version uses PyTorch's CUDAExtension for reliable CUDA compilation. |
8 | 10 | """ |
9 | 11 |
|
10 | 12 | import subprocess |
|
13 | 15 | import time |
14 | 16 |
|
15 | 17 | print("=" * 70) |
16 | | -print("XERV CRAYON INSTALLATION V4.2.4") |
| 18 | +print("XERV CRAYON V4.2.4 INSTALLATION") |
17 | 19 | print("=" * 70) |
18 | 20 |
|
19 | | -# Step 1: GPU Detection |
20 | 21 | print("\n[1/7] Detecting GPU hardware...") |
21 | 22 | try: |
22 | 23 | result = subprocess.run(["nvidia-smi", "--query-gpu=name,compute_cap", "--format=csv,noheader"], |
|
32 | 33 | print(" No NVIDIA GPU detected") |
33 | 34 | has_gpu = False |
34 | 35 |
|
35 | | -# Step 2: NVCC Detection |
36 | 36 | print("\n[2/7] Checking CUDA compiler...") |
37 | 37 | nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True) |
38 | 38 | if nvcc_check.returncode == 0: |
39 | | - nvcc_path = nvcc_check.stdout.strip() |
40 | | - print(f" NVCC: {nvcc_path}") |
41 | | - has_nvcc = True |
| 39 | + print(f" NVCC: {nvcc_check.stdout.strip()}") |
42 | 40 | else: |
43 | 41 | print(" NVCC not found") |
44 | | - has_nvcc = False |
45 | 42 |
|
46 | | -# Step 3: Ensure PyTorch is installed (required for CUDAExtension) |
47 | | -print("\n[3/7] Checking PyTorch...") |
48 | | -try: |
49 | | - import torch |
50 | | - print(f" PyTorch {torch.__version__}") |
51 | | - print(f" CUDA available: {torch.cuda.is_available()}") |
52 | | -except ImportError: |
53 | | - print(" Installing PyTorch...") |
54 | | - subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "torch"]) |
55 | | - import torch |
56 | | - print(f" PyTorch {torch.__version__} installed") |
57 | | - |
58 | | -# Step 4: Clean ALL Caches |
59 | | -print("\n[4/7] Cleaning ALL caches...") |
| 43 | +print("\n[3/7] Ensuring PyTorch with CUDA...") |
| 44 | +subprocess.run([sys.executable, "-m", "pip", "install", "-q", "torch"], capture_output=True) |
| 45 | + |
| 46 | +import torch |
| 47 | +print(f" PyTorch: {torch.__version__}") |
| 48 | +print(f" CUDA available: {torch.cuda.is_available()}") |
| 49 | +if torch.cuda.is_available(): |
| 50 | + print(f" CUDA version: {torch.version.cuda}") |
| 51 | + |
| 52 | +print("\n[4/7] Cleaning caches...") |
60 | 53 | os.system("pip uninstall -y xerv-crayon crayon 2>/dev/null") |
61 | 54 | os.system("pip cache purge 2>/dev/null") |
62 | 55 | os.system("rm -rf /tmp/crayon* ~/.cache/pip 2>/dev/null") |
63 | 56 | print(" Done") |
64 | 57 |
|
65 | | -# Step 5: Fresh Clone |
66 | 58 | print("\n[5/7] Cloning from GitHub...") |
67 | 59 | timestamp = int(time.time()) |
68 | 60 | clone_dir = f"/tmp/crayon_{timestamp}" |
69 | 61 | os.system(f"git clone --depth 1 https://github.com/Electroiscoding/CRAYON.git {clone_dir}") |
70 | 62 |
|
71 | 63 | version_check = subprocess.run(["grep", "-m1", "__version__", f"{clone_dir}/src/crayon/__init__.py"], |
72 | 64 | capture_output=True, text=True) |
73 | | -print(f" {version_check.stdout.strip()}") |
| 65 | +print(f" Source version: {version_check.stdout.strip()}") |
74 | 66 |
|
75 | | -# Step 6: Build and Install |
76 | | -print("\n[6/7] Building with CUDA support (this takes ~2 min)...") |
| 67 | +print("\n[6/7] Building with PyTorch CUDAExtension...") |
77 | 68 | print("-" * 70) |
78 | 69 |
|
79 | | -env = os.environ.copy() |
80 | | -env["CUDA_HOME"] = "/usr/local/cuda" |
| 70 | +build_env = os.environ.copy() |
| 71 | +build_env["CUDA_HOME"] = "/usr/local/cuda" |
81 | 72 |
|
82 | 73 | result = subprocess.run( |
83 | 74 | [sys.executable, "-m", "pip", "install", "-v", "--no-cache-dir", "--no-build-isolation", clone_dir], |
84 | | - env=env |
| 75 | + env=build_env |
85 | 76 | ) |
86 | 77 |
|
87 | 78 | print("-" * 70) |
88 | 79 |
|
89 | | -# Step 7: Verify |
90 | 80 | print("\n[7/7] Verifying installation...") |
91 | 81 |
|
92 | 82 | for key in list(sys.modules.keys()): |
93 | 83 | if "crayon" in key: |
94 | 84 | del sys.modules[key] |
95 | 85 |
|
96 | 86 | import crayon |
97 | | -print(f"\n Version: {crayon.get_version()}") |
| 87 | +print(f"\n Installed version: {crayon.get_version()}") |
98 | 88 | backends = crayon.check_backends() |
99 | 89 | print(f" Backends: {backends}") |
100 | 90 |
|
101 | 91 | if backends.get("cuda"): |
102 | | - print(" CUDA: READY", "\u2705") |
103 | | -elif has_gpu and has_nvcc: |
104 | | - print(" WARNING: GPU detected but CUDA not compiled!") |
105 | | - print(" Check build output above for nvcc errors") |
| 92 | + print(" CUDA backend: READY", "") |
| 93 | +else: |
| 94 | + if has_gpu: |
| 95 | + print(" CUDA backend: NOT AVAILABLE (check build logs above)") |
| 96 | + else: |
| 97 | + print(" CUDA backend: NOT AVAILABLE (no GPU)") |
106 | 98 |
|
107 | 99 | print("\n" + "=" * 70) |
108 | | -print("TOKENIZER TEST") |
| 100 | +print("TOKENIZER INITIALIZATION") |
109 | 101 | print("=" * 70) |
110 | 102 |
|
111 | 103 | from crayon import CrayonVocab |
|
114 | 106 | vocab.load_profile("lite") |
115 | 107 |
|
116 | 108 | info = vocab.get_info() |
117 | | -print(f"\nDevice: {info['device'].upper()}") |
| 109 | +print(f"\nActive Device: {info['device'].upper()}") |
118 | 110 | print(f"Backend: {info['backend']}") |
119 | 111 | print(f"Vocabulary: {vocab.vocab_size:,} tokens") |
120 | 112 |
|
121 | | -text = "Hello, Crayon!" |
| 113 | +text = "Hello, Crayon tokenizer!" |
122 | 114 | tokens = vocab.tokenize(text) |
123 | | -print(f"\nTest: '{text}' -> {tokens}") |
| 115 | +print(f"\nQuick Test: '{text}'") |
| 116 | +print(f"Tokens: {tokens}") |
| 117 | +print(f"Count: {len(tokens)}") |
124 | 118 |
|
125 | 119 | print("\n" + "=" * 70) |
126 | | -print("BENCHMARKS") |
| 120 | +print("PERFORMANCE BENCHMARKS") |
127 | 121 | print("=" * 70) |
128 | 122 |
|
129 | 123 | base_text = "The quick brown fox jumps over the lazy dog." |
130 | 124 |
|
131 | | -print("\n--- Throughput ---") |
| 125 | +print("\n--- Latency (single string) ---") |
| 126 | +iterations = 10000 |
| 127 | +for _ in range(100): |
| 128 | + vocab.tokenize(base_text) |
| 129 | +start = time.perf_counter() |
| 130 | +for _ in range(iterations): |
| 131 | + vocab.tokenize(base_text) |
| 132 | +elapsed = time.perf_counter() - start |
| 133 | +print(f"Latency: {(elapsed/iterations)*1e6:.2f} us/call") |
| 134 | +print(f"Calls/sec: {iterations/elapsed:,.0f}") |
| 135 | + |
| 136 | +print("\n--- Batch Throughput ---") |
| 137 | +print(f"{'Batch':>10} | {'Docs/sec':>14} | {'Tokens/sec':>16}") |
| 138 | +print("-" * 48) |
| 139 | + |
132 | 140 | for batch_size in [1000, 10000, 50000]: |
133 | 141 | batch = [base_text] * batch_size |
134 | 142 | vocab.tokenize(batch[:10]) |
135 | 143 | start = time.time() |
136 | 144 | results = vocab.tokenize(batch) |
137 | 145 | duration = time.time() - start |
138 | 146 | total_tokens = sum(len(r) for r in results) |
139 | | - print(f"{batch_size:>8}: {batch_size/duration:>12,.0f} docs/sec | {total_tokens/duration:>14,.0f} tokens/sec") |
| 147 | + print(f"{batch_size:>10,} | {batch_size/duration:>14,.0f} | {total_tokens/duration:>16,.0f}") |
140 | 148 |
|
141 | 149 | if vocab.device != "cpu": |
142 | | - print(f"\n--- GPU Stress Test ---") |
| 150 | + print(f"\n--- GPU Stress Test ({vocab.device.upper()}) ---") |
143 | 151 | for batch_size in [100000, 500000]: |
144 | 152 | batch = [base_text] * batch_size |
| 153 | + torch.cuda.synchronize() if torch.cuda.is_available() else None |
145 | 154 | start = time.time() |
146 | 155 | results = vocab.tokenize(batch) |
| 156 | + torch.cuda.synchronize() if torch.cuda.is_available() else None |
147 | 157 | duration = time.time() - start |
148 | | - print(f"{batch_size:>8}: {batch_size/duration:>12,.0f} docs/sec in {duration:.3f}s") |
| 158 | + total_tokens = sum(len(r) for r in results) |
| 159 | + print(f"{batch_size:>10,}: {batch_size/duration:>12,.0f} docs/sec | {duration:.3f}s") |
149 | 160 |
|
150 | | -vocab.close() |
151 | 161 | print("\n" + "=" * 70) |
152 | | -print("DONE!") |
| 162 | +print("ENCODE/DECODE VERIFICATION") |
153 | 163 | print("=" * 70) |
| 164 | + |
| 165 | +test_cases = [ |
| 166 | + "Hello, world!", |
| 167 | + "The quick brown fox.", |
| 168 | + "def forward(self, x): return x", |
| 169 | +] |
| 170 | + |
| 171 | +all_passed = True |
| 172 | +for text in test_cases: |
| 173 | + tokens = vocab.tokenize(text) |
| 174 | + decoded = vocab.decode(tokens) |
| 175 | + passed = text == decoded |
| 176 | + all_passed = all_passed and passed |
| 177 | + status = "PASS" if passed else "FAIL" |
| 178 | + print(f"[{status}] '{text}' -> {len(tokens)} tokens") |
| 179 | + |
| 180 | +print(f"\nAll tests: {'PASSED' if all_passed else 'FAILED'}") |
| 181 | + |
| 182 | +vocab.close() |
| 183 | +print("\nDone!") |
0 commit comments