-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_readme_examples.py
More file actions
129 lines (105 loc) · 3.36 KB
/
test_readme_examples.py
File metadata and controls
129 lines (105 loc) · 3.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Test all code examples from README.md to ensure they work correctly.
"""
import sys
import os
# Add paths
sys.path.insert(0, os.path.join(os.getcwd(), "build", "lib.win-amd64-cpython-313"))
sys.path.insert(0, os.path.join(os.getcwd(), "src"))
print("=" * 70)
print("TESTING README CODE EXAMPLES")
print("=" * 70)
print()
# Test 1: Quick Start Example
print("[TEST 1] Quick Start - Load Profile and Tokenize")
print("-" * 70)
try:
from crayon.core.vocabulary import CrayonVocab
vocab = CrayonVocab(device="auto")
vocab.load_profile("lite")
# Tokenize specialized syntax
code_snippet = "fn main() { println!(\"Hello, World!\"); }"
tokens = vocab.tokenize(code_snippet)
# Check if decode works
try:
decoded = vocab.decode(tokens)
print(f"✓ Tokenize: {code_snippet}")
print(f"✓ Tokens: {tokens}")
print(f"✓ Decoded: {decoded}")
print("✓ TEST PASSED")
except AttributeError:
print(f"⚠ WARNING: vocab.decode() not implemented yet")
print(f"✓ Tokenize works: {tokens}")
print("✓ TEST PARTIALLY PASSED")
except Exception as e:
print(f"✗ TEST FAILED: {e}")
import traceback
traceback.print_exc()
print()
# Test 2: Load different profiles
print("[TEST 2] Load Different Profiles")
print("-" * 70)
for profile_name in ["lite", "standard"]:
try:
vocab = CrayonVocab(device="auto")
vocab.load_profile(profile_name)
print(f"✓ Loaded '{profile_name}' profile")
except Exception as e:
print(f"✗ Failed to load '{profile_name}': {e}")
print()
# Test 3: DAT Builder Example
print("[TEST 3] Compile Vocabulary to DAT Format")
print("-" * 70)
try:
from crayon.c_ext.dat_builder import DATBuilder
import json
import tempfile
# Use a small test vocab
test_vocab = ["hello", "world", "test", "python"]
# Compile to DAT
builder = DATBuilder()
builder.build(test_vocab)
# Save to temp file
dat_path = os.path.join(tempfile.gettempdir(), "test_readme.dat")
builder.save(dat_path)
print(f"✓ Built DAT with {builder.size} nodes")
print(f"✓ Saved to {dat_path}")
os.unlink(dat_path)
print("✓ TEST PASSED")
except Exception as e:
print(f"✗ TEST FAILED: {e}")
import traceback
traceback.print_exc()
print()
# Test 4: Direct C++ Engine Access
print("[TEST 4] Direct C++ Engine Access")
print("-" * 70)
try:
import mmap
from crayon.c_ext import crayon_fast
from crayon.c_ext.dat_builder import DATBuilder
import tempfile
# Build a small DAT
test_vocab = ["the", "quick", "brown", "fox"]
builder = DATBuilder()
builder.build(test_vocab)
dat_path = os.path.join(tempfile.gettempdir(), "test_engine.dat")
builder.save(dat_path)
# Zero-copy load via mmap
with open(dat_path, "rb") as f:
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
size = crayon_fast.load_dat(mm)
# Ultra-fast tokenization
tokens = crayon_fast.tokenize("the quick brown fox")
print(f"✓ Loaded DAT: {size} nodes")
print(f"✓ Tokenized: {tokens}")
os.unlink(dat_path)
print("✓ TEST PASSED")
except Exception as e:
print(f"✗ TEST FAILED: {e}")
import traceback
traceback.print_exc()
print()
print("=" * 70)
print("README CODE TESTS COMPLETE")
print("=" * 70)