-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplayground_examples.py
More file actions
90 lines (74 loc) · 2.01 KB
/
playground_examples.py
File metadata and controls
90 lines (74 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
default_user_input = (
"""Replace this text in the input field to see how tokenization works."""
)
default_tokenizer_name_1 = "openai/gpt-4o"
default_tokenizer_name_2 = "Qwen/Qwen2.5-72B"
number_example = """127+677=804\n
127 + 677 = 804
"""
code_example = """for i in range(1, 101):
if i % 3 == 0 and i % 5 == 0:
print("FizzBuzz")
elif i % 3 == 0:
print("Fizz")
elif i % 5 == 0:
print("Buzz")
else:
print(i)
"""
spelling_example = """How do you spell "accommodate"?
How many letters are in the word "accommodate"?
How many r's are in the word strawberry?"""
greek_example = """
# Both mean 'I am sorry' though the latter one contains accent mark or stress mark
Συγνωμη
Συγνώμη
# Both refer to "bean"
Φασόλι
Φασούλι
# Both refer to "Saturday"
Σάββατο
Σάβατο
# Both translate to 'egg'
Αυγό
Αγβό
# They both translate to grandfather, though the latter is mostly used in Corfu Island
Παππούς
Πάπους
# They mean two completely different things!
Νόνα # refers to grandmother commonly observed in Ionion pelagos
Νονά # refers to godmother in Christianity
# Both refer to something new
καινούριος
καινούργιος
# Both refer to tomato
ντοματα
τοματα
τρενο
τραινο
# Singular / Plural versions of something 'innate'
εγγενής
εγγενείς
"""
examples = {
"number": {
"text": number_example,
"tokenizer_1": default_tokenizer_name_1,
"tokenizer_2": default_tokenizer_name_2,
},
"code": {
"text": code_example,
"tokenizer_1": default_tokenizer_name_1,
"tokenizer_2": default_tokenizer_name_2,
},
"spelling": {
"text": spelling_example,
"tokenizer_1": default_tokenizer_name_1,
"tokenizer_2": default_tokenizer_name_2,
},
"greek": {
"text": greek_example,
"tokenizer_1": default_tokenizer_name_1,
"tokenizer_2": "ilsp/Llama-Krikri-8B-Base",
},
}