GoogleCloudPlatform · msampathkumar · Sep 17, 2024 · Sep 17, 2024
@@ -0,0 +1,44 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+
+
+def compute_tokens_example() -> int:
+    # [START generativeaionvertexai_compute_tokens]
+    from vertexai.preview.tokenization import get_tokenizer_for_model
+
+    # init local tokenzier
+    tokenizer = get_tokenizer_for_model("gemini-1.5-flash")
+
+    # Count Tokens
+    prompt = "why is the sky blue?"
+    response = tokenizer.count_tokens(prompt)
+    print(f"Tokens count: {response.total_tokens}")
+    # Example response:
+    #       Tokens count: 6
+
+    # Compute Tokens
+    response = tokenizer.compute_tokens(prompt)
+    print(f"Tokens list: {response.tokens_info}")
+    # Example response:
+    #     Tokens list: [TokensInfo(token_ids=[18177, 603, 573, 8203, 3868, 235336],
+    #          tokens=[b'why', b' is', b' the', b' sky', b' blue', b'?'], role='user')]
+    # [END generativeaionvertexai_compute_tokens]
+    return len(response.tokens_info)
+
+
+if __name__ == "__main__":
+    compute_tokens_example()
@@ -20,21 +20,48 @@
 def count_token_locally() -> int:
     # [START generativeaionvertexai_token_count_sample_with_local_sdk]
     from vertexai.preview.tokenization import get_tokenizer_for_model
+    from vertexai.generative_models import FunctionDeclaration, Tool
 
-    # using local tokenzier
+    # init local tokenzier
     tokenizer = get_tokenizer_for_model("gemini-1.5-flash")
 
+    # simple text
     prompt = "hello world"
     response = tokenizer.count_tokens(prompt)
     print(f"Prompt Token Count: {response.total_tokens}")
+    # Example response:
+    #   Prompt Token Count: 2
 
+    # simple text with system instructions
     prompt = ["hello world", "what's the weather today"]
-    response = tokenizer.count_tokens(prompt)
+    response = tokenizer.count_tokens(prompt, system_instruction="you are a chatbot")
     print(f"Prompt Token Count: {response.total_tokens}")
+    # Example response:
+    #   Prompt Token Count: 12
+
+    # Count tokens with a function declaration
+    def get_current_weather(location: str, unit: str = "centigrade") -> dict:
+        """Gets weather in the specified location.
+        Args:
+            location: The location for which to get the weather.
+            unit: Optional. Temperature unit. Can be Centigrade or Fahrenheit. Defaults to Centigrade.
+        Returns:
+            The weather information as a dict.
+        """
+        return dict(
+            location="us-central1",
+            unit=unit,
+            weather="Super nice, but maybe a bit hot.",
+        )
+    weather_tool = Tool(function_declarations=[FunctionDeclaration.from_func(get_current_weather)])
+    print(tokenizer.count_tokens("hello", tools=[weather_tool]))
+    # Example response:
+    #     CountTokensResult(total_tokens=49)
     # [END generativeaionvertexai_token_count_sample_with_local_sdk]
     return response.total_tokens
 
 
+# TODO: Delete the following samples after API deprecation. `count_token_locally` is faster & recommended.
 def count_token_service() -> int:
     # [START generativeaionvertexai_token_count_sample_with_genai]
     import vertexai
@@ -50,11 +77,17 @@ def count_token_service() -> int:
     response = model.count_tokens(prompt)
     print(f"Prompt Token Count: {response.total_tokens}")
     print(f"Prompt Character Count: {response.total_billable_characters}")
+    # Example response:
+    #     Prompt Token Count: 2
+    #     Prompt Token Count: 10
 
     prompt = ["hello world", "what's the weather today"]
     response = model.count_tokens(prompt)
     print(f"Prompt Token Count: {response.total_tokens}")
     print(f"Prompt Character Count: {response.total_billable_characters}")
+    # Example response:
+    #     Prompt Token Count: 8
+    #     Prompt Token Count: 31
     # [END generativeaionvertexai_token_count_sample_with_genai]
     return response.total_tokens
 

@@ -13,9 +13,14 @@
 # limitations under the License.
 
 
+import compute_tokens_example
 import count_token_example
 
 
 def test_count_token() -> None:
     assert count_token_example.count_token_locally()
     assert count_token_example.count_token_service()
+
+
+def test_compute_token() -> None:
+    assert compute_tokens_example.compute_tokens_example()