Skip to content

Commit e6d1b6c

Browse files
feat(genai): Add local tokenizer samples for Count and Compute (#13602)
* feat(genai): Add local tokenizer samples for Count and Compute * feat(genai): Add local tokenizer samples for Count and Compute
1 parent 67e40f7 commit e6d1b6c

File tree

5 files changed

+78
-1
lines changed

5 files changed

+78
-1
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def counttoken_localtokenizer_compute_with_txt() -> int:
17+
# [START googlegenaisdk_counttoken_localtokenizer_compute_with_txt]
18+
from google.genai.local_tokenizer import LocalTokenizer
19+
20+
tokenizer = LocalTokenizer(model_name="gemini-2.5-flash")
21+
response = tokenizer.compute_tokens("What's the longest word in the English language?")
22+
print(response)
23+
# Example output:
24+
# tokens_info=[TokensInfo(
25+
# role='user',
26+
# token_ids=[3689, 236789, 236751, 506,
27+
# 27801, 3658, 528, 506, 5422, 5192, 236881],
28+
# tokens=[b'What', b"'", b's', b' the', b' longest',
29+
# b' word', b' in', b' the', b' English', b' language', b'?']
30+
# )]
31+
# [END googlegenaisdk_counttoken_localtokenizer_compute_with_txt]
32+
return response.tokens_info
33+
34+
35+
if __name__ == "__main__":
36+
counttoken_localtokenizer_compute_with_txt()
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def counttoken_localtokenizer_with_txt() -> int:
17+
# [START googlegenaisdk_counttoken_localtokenizer_with_txt]
18+
from google.genai.local_tokenizer import LocalTokenizer
19+
20+
tokenizer = LocalTokenizer(model_name="gemini-2.5-flash")
21+
response = tokenizer.count_tokens("What's the highest mountain in Africa?")
22+
print(response)
23+
# Example output:
24+
# total_tokens=10
25+
# [END googlegenaisdk_counttoken_localtokenizer_with_txt]
26+
return response.total_tokens
27+
28+
29+
if __name__ == "__main__":
30+
counttoken_localtokenizer_with_txt()

genai/count_tokens/counttoken_with_txt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def count_tokens() -> int:
2525
)
2626
print(response)
2727
# Example output:
28-
# total_tokens=10
28+
# total_tokens=9
2929
# cached_content_token_count=None
3030
# [END googlegenaisdk_counttoken_with_txt]
3131
return response.total_tokens
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
google-genai==1.42.0
2+
sentencepiece==0.2.1

genai/count_tokens/test_count_tokens_examples.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import os
2020

2121
import counttoken_compute_with_txt
22+
import counttoken_localtokenizer_compute_with_txt
23+
import counttoken_localtokenizer_with_txt
2224
import counttoken_resp_with_txt
2325
import counttoken_with_txt
2426
import counttoken_with_txt_vid
@@ -43,3 +45,11 @@ def test_counttoken_with_txt() -> None:
4345

4446
def test_counttoken_with_txt_vid() -> None:
4547
assert counttoken_with_txt_vid.count_tokens()
48+
49+
50+
def test_counttoken_localtokenizer_with_txt() -> None:
51+
assert counttoken_localtokenizer_with_txt.counttoken_localtokenizer_with_txt()
52+
53+
54+
def test_counttoken_localtokenizer_compute_with_txt() -> None:
55+
assert counttoken_localtokenizer_compute_with_txt.counttoken_localtokenizer_compute_with_txt()

0 commit comments

Comments
 (0)