feat(genai): Add local tokenizer samples for Count and Compute (#13602)

msampathkumar · web-flow · commit e6d1b6c547ee · 2025-10-16T08:27:23.000-04:00
* feat(genai): Add local tokenizer samples for Count and Compute

* feat(genai): Add local tokenizer samples for Count and Compute
diff --git a/genai/count_tokens/counttoken_localtokenizer_compute_with_txt.py b/genai/count_tokens/counttoken_localtokenizer_compute_with_txt.py
@@ -0,0 +1,36 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def counttoken_localtokenizer_compute_with_txt() -> int:
+    # [START googlegenaisdk_counttoken_localtokenizer_compute_with_txt]
+    from google.genai.local_tokenizer import LocalTokenizer
+
+    tokenizer = LocalTokenizer(model_name="gemini-2.5-flash")
+    response = tokenizer.compute_tokens("What's the longest word in the English language?")
+    print(response)
+    # Example output:
+    # tokens_info=[TokensInfo(
+    #     role='user',
+    #     token_ids=[3689, 236789, 236751, 506,
+    #               27801, 3658, 528, 506, 5422, 5192, 236881],
+    #     tokens=[b'What', b"'", b's', b' the', b' longest',
+    #            b' word', b' in', b' the', b' English', b' language', b'?']
+    #     )]
+    # [END googlegenaisdk_counttoken_localtokenizer_compute_with_txt]
+    return response.tokens_info
+
+
+if __name__ == "__main__":
+    counttoken_localtokenizer_compute_with_txt()
diff --git a/genai/count_tokens/counttoken_localtokenizer_with_txt.py b/genai/count_tokens/counttoken_localtokenizer_with_txt.py
@@ -0,0 +1,30 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def counttoken_localtokenizer_with_txt() -> int:
+    # [START googlegenaisdk_counttoken_localtokenizer_with_txt]
+    from google.genai.local_tokenizer import LocalTokenizer
+
+    tokenizer = LocalTokenizer(model_name="gemini-2.5-flash")
+    response = tokenizer.count_tokens("What's the highest mountain in Africa?")
+    print(response)
+    # Example output:
+    #   total_tokens=10
+    # [END googlegenaisdk_counttoken_localtokenizer_with_txt]
+    return response.total_tokens
+
+
+if __name__ == "__main__":
+    counttoken_localtokenizer_with_txt()
diff --git a/genai/count_tokens/counttoken_with_txt.py b/genai/count_tokens/counttoken_with_txt.py
@@ -25,7 +25,7 @@ def count_tokens() -> int:
     )
     print(response)
     # Example output:
-    # total_tokens=10
+    # total_tokens=9
     # cached_content_token_count=None
     # [END googlegenaisdk_counttoken_with_txt]
     return response.total_tokens
diff --git a/genai/count_tokens/requirements.txt b/genai/count_tokens/requirements.txt
@@ -1 +1,2 @@
 google-genai==1.42.0
+sentencepiece==0.2.1
diff --git a/genai/count_tokens/test_count_tokens_examples.py b/genai/count_tokens/test_count_tokens_examples.py
@@ -19,6 +19,8 @@
 import os
 
 import counttoken_compute_with_txt
+import counttoken_localtokenizer_compute_with_txt
+import counttoken_localtokenizer_with_txt
 import counttoken_resp_with_txt
 import counttoken_with_txt
 import counttoken_with_txt_vid
@@ -43,3 +45,11 @@ def test_counttoken_with_txt() -> None:
 
 def test_counttoken_with_txt_vid() -> None:
     assert counttoken_with_txt_vid.count_tokens()
+
+
+def test_counttoken_localtokenizer_with_txt() -> None:
+    assert counttoken_localtokenizer_with_txt.counttoken_localtokenizer_with_txt()
+
+
+def test_counttoken_localtokenizer_compute_with_txt() -> None:
+    assert counttoken_localtokenizer_compute_with_txt.counttoken_localtokenizer_compute_with_txt()

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ def count_tokens() -> int:`
`25`	`25`	`)`
`26`	`26`	`print(response)`
`27`	`27`	`# Example output:`
`28`		`- # total_tokens=10`
	`28`	`+ # total_tokens=9`
`29`	`29`	`# cached_content_token_count=None`
`30`	`30`	`# [END googlegenaisdk_counttoken_with_txt]`
`31`	`31`	`return response.total_tokens`
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`google-genai==1.42.0`
	`2`	`+sentencepiece==0.2.1`