VertexAI - Add CountTokens and ModalityTokenCount (#1216)

a-maurice · web-flow · commit 1c506632ed2d · 2025-03-11T11:16:53.000-07:00
* VertexAI - Add CountToken and ModalityTokenCount

* VertexAI - Improve error handling
diff --git a/vertexai/src/CountTokensResponse.cs b/vertexai/src/CountTokensResponse.cs
@@ -14,13 +14,64 @@
  * limitations under the License.
  */
 
+using System.Collections.Generic;
+using System.Collections.ObjectModel;
+using Google.MiniJSON;
+using Firebase.VertexAI.Internal;
+
 namespace Firebase.VertexAI {
 
+/// <summary>
+/// The model's response to a count tokens request.
+/// </summary>
 public readonly struct CountTokensResponse {
+  /// <summary>
+  /// The total number of tokens in the input given to the model as a prompt.
+  /// </summary>
   public int TotalTokens { get; }
+  /// <summary>
+  /// The total number of billable characters in the text input given to the model as a prompt.
+  ///
+  /// > Important: This does not include billable image, video or other non-text input. See
+  /// [Vertex AI pricing](https://firebase.google.com/docs/vertex-ai/pricing) for details.
+  /// </summary>
   public int? TotalBillableCharacters { get; }
 
+  private readonly ReadOnlyCollection<ModalityTokenCount> _promptTokensDetails;
+  /// <summary>
+  /// The breakdown, by modality, of how many tokens are consumed by the prompt.
+  /// </summary>
+  public IEnumerable<ModalityTokenCount> PromptTokensDetails =>
+      _promptTokensDetails ?? new ReadOnlyCollection<ModalityTokenCount>(new List<ModalityTokenCount>());
+
   // Hidden constructor, users don't need to make this
+  private CountTokensResponse(int totalTokens,
+                              int? totalBillableCharacters = null,
+                              List<ModalityTokenCount> promptTokensDetails = null) {
+    TotalTokens = totalTokens;
+    TotalBillableCharacters = totalBillableCharacters;
+    _promptTokensDetails =
+        new ReadOnlyCollection<ModalityTokenCount>(promptTokensDetails ?? new List<ModalityTokenCount>());
+  }
+
+  /// <summary>
+  /// Intended for internal use only.
+  /// This method is used for deserializing JSON responses and should not be called directly.
+  /// </summary>
+  internal static CountTokensResponse FromJson(string jsonString) {
+    return FromJson(Json.Deserialize(jsonString) as Dictionary<string, object>);
+  }
+
+  /// <summary>
+  /// Intended for internal use only.
+  /// This method is used for deserializing JSON responses and should not be called directly.
+  /// </summary>
+  internal static CountTokensResponse FromJson(Dictionary<string, object> jsonDict) {
+    return new CountTokensResponse(
+      jsonDict.ParseValue<int>("totalTokens"),
+      jsonDict.ParseNullableValue<int>("totalBillableCharacters"),
+      jsonDict.ParseObjectList("promptTokensDetails", ModalityTokenCount.FromJson));
+  }
 }
 
 }
diff --git a/vertexai/src/GenerateContentResponse.cs b/vertexai/src/GenerateContentResponse.cs
@@ -189,13 +189,25 @@ public readonly struct UsageMetadata {
   /// </summary>
   public int TotalTokenCount { get; }
 
-  // TODO: New fields about ModalityTokenCount
+  private readonly ReadOnlyCollection<ModalityTokenCount> _promptTokensDetails;
+  public IEnumerable<ModalityTokenCount> PromptTokensDetails =>
+      _promptTokensDetails ?? new ReadOnlyCollection<ModalityTokenCount>(new List<ModalityTokenCount>());
+
+  private readonly ReadOnlyCollection<ModalityTokenCount> _candidatesTokensDetails;
+  public IEnumerable<ModalityTokenCount> CandidatesTokensDetails =>
+      _candidatesTokensDetails ?? new ReadOnlyCollection<ModalityTokenCount>(new List<ModalityTokenCount>());
 
   // Hidden constructor, users don't need to make this.
-  private UsageMetadata(int promptTC, int candidatesTC, int totalTC) {
+  private UsageMetadata(int promptTC, int candidatesTC, int totalTC,
+                        List<ModalityTokenCount> promptDetails, List<ModalityTokenCount> candidateDetails) {
     PromptTokenCount = promptTC;
     CandidatesTokenCount = candidatesTC;
     TotalTokenCount = totalTC;
+    _promptTokensDetails =
+        new ReadOnlyCollection<ModalityTokenCount>(promptDetails ?? new List<ModalityTokenCount>());
+    _candidatesTokensDetails =
+        new ReadOnlyCollection<ModalityTokenCount>(candidateDetails ?? new List<ModalityTokenCount>());
+
   }
 
   /// <summary>
@@ -206,7 +218,9 @@ internal static UsageMetadata FromJson(Dictionary<string, object> jsonDict) {
     return new UsageMetadata(
       jsonDict.ParseValue<int>("promptTokenCount"),
       jsonDict.ParseValue<int>("candidatesTokenCount"),
-      jsonDict.ParseValue<int>("totalTokenCount"));
+      jsonDict.ParseValue<int>("totalTokenCount"),
+      jsonDict.ParseObjectList("promptTokensDetails", ModalityTokenCount.FromJson),
+      jsonDict.ParseObjectList("candidatesTokensDetails", ModalityTokenCount.FromJson));
   }
 }
 
diff --git a/vertexai/src/GenerativeModel.cs b/vertexai/src/GenerativeModel.cs
@@ -91,7 +91,7 @@ public Task<GenerateContentResponse> GenerateContentAsync(
   /// <summary>
   /// Generates new content from input text given to the model as a prompt.
   /// </summary>
-  /// <param name="content">The text given to the model as a prompt.</param>
+  /// <param name="text">The text given to the model as a prompt.</param>
   /// <returns>The generated content response from the model.</returns>
   /// <exception cref="VertexAIException">Thrown when an error occurs during content generation.</exception>
   public Task<GenerateContentResponse> GenerateContentAsync(
@@ -122,7 +122,7 @@ public IAsyncEnumerable<GenerateContentResponse> GenerateContentStreamAsync(
   /// <summary>
   /// Generates new content as a stream from input text given to the model as a prompt.
   /// </summary>
-  /// <param name="content">The text given to the model as a prompt.</param>
+  /// <param name="text">The text given to the model as a prompt.</param>
   /// <returns>A stream of generated content responses from the model.</returns>
   /// <exception cref="VertexAIException">Thrown when an error occurs during content generation.</exception>
   public IAsyncEnumerable<GenerateContentResponse> GenerateContentStreamAsync(
@@ -140,14 +140,32 @@ public IAsyncEnumerable<GenerateContentResponse> GenerateContentStreamAsync(
     return GenerateContentStreamAsyncInternal(content);
   }
 
+  /// <summary>
+  /// Counts the number of tokens in a prompt using the model's tokenizer.
+  /// </summary>
+  /// <param name="content">The input(s) given to the model as a prompt.</param>
+  /// <returns>The `CountTokensResponse` of running the model's tokenizer on the input.</returns>
+  /// <exception cref="VertexAIException">Thrown when an error occurs during the request.</exception>
   public Task<CountTokensResponse> CountTokensAsync(
       params ModelContent[] content) {
     return CountTokensAsync((IEnumerable<ModelContent>)content);
   }
+  /// <summary>
+  /// Counts the number of tokens in a prompt using the model's tokenizer.
+  /// </summary>
+  /// <param name="text">The text input given to the model as a prompt.</param>
+  /// <returns>The `CountTokensResponse` of running the model's tokenizer on the input.</returns>
+  /// <exception cref="VertexAIException">Thrown when an error occurs during the request.</exception>
   public Task<CountTokensResponse> CountTokensAsync(
       string text) {
     return CountTokensAsync(new ModelContent[] { ModelContent.Text(text) });
   }
+  /// <summary>
+  /// Counts the number of tokens in a prompt using the model's tokenizer.
+  /// </summary>
+  /// <param name="content">The input(s) given to the model as a prompt.</param>
+  /// <returns>The `CountTokensResponse` of running the model's tokenizer on the input.</returns>
+  /// <exception cref="VertexAIException">Thrown when an error occurs during the request.</exception>
   public Task<CountTokensResponse> CountTokensAsync(
       IEnumerable<ModelContent> content) {
     return CountTokensAsyncInternal(content);
@@ -184,12 +202,16 @@ private async Task<GenerateContentResponse> GenerateContentAsyncInternal(
     UnityEngine.Debug.Log("Request:\n" + bodyJson);
 #endif
 
-    HttpResponseMessage response = await _httpClient.SendAsync(request);
-    // TODO: Convert any timeout exception into a VertexAI equivalent
-    // TODO: Convert any HttpRequestExceptions, see:
-    // https://learn.microsoft.com/en-us/dotnet/api/system.net.http.httpclient.sendasync?view=net-9.0
-    // https://learn.microsoft.com/en-us/dotnet/api/system.net.http.httpresponsemessage.ensuresuccessstatuscode?view=net-9.0
-    response.EnsureSuccessStatusCode();
+    HttpResponseMessage response;
+    try {
+      response = await _httpClient.SendAsync(request);
+      response.EnsureSuccessStatusCode();
+    } catch (TaskCanceledException e) when (e.InnerException is TimeoutException) {
+      throw new VertexAIRequestTimeoutException("Request timed out.", e);
+    } catch (HttpRequestException e) {
+      // TODO: Convert to a more precise exception when possible.
+      throw new VertexAIException("HTTP request failed.", e);
+    }
 
     string result = await response.Content.ReadAsStringAsync();
 
@@ -215,13 +237,16 @@ private async IAsyncEnumerable<GenerateContentResponse> GenerateContentStreamAsy
     UnityEngine.Debug.Log("Request:\n" + bodyJson);
 #endif
 
-    HttpResponseMessage response =
-        await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
-    // TODO: Convert any timeout exception into a VertexAI equivalent
-    // TODO: Convert any HttpRequestExceptions, see:
-    // https://learn.microsoft.com/en-us/dotnet/api/system.net.http.httpclient.sendasync?view=net-9.0
-    // https://learn.microsoft.com/en-us/dotnet/api/system.net.http.httpresponsemessage.ensuresuccessstatuscode?view=net-9.0
-    response.EnsureSuccessStatusCode();
+    HttpResponseMessage response;
+    try {
+      response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
+      response.EnsureSuccessStatusCode();
+    } catch (TaskCanceledException e) when (e.InnerException is TimeoutException) {
+      throw new VertexAIRequestTimeoutException("Request timed out.", e);
+    } catch (HttpRequestException e) {
+      // TODO: Convert to a more precise exception when possible.
+      throw new VertexAIException("HTTP request failed.", e);
+    }
 
     // We are expecting a Stream as the response, so handle that.
     using var stream = await response.Content.ReadAsStreamAsync();
@@ -242,9 +267,37 @@ private async IAsyncEnumerable<GenerateContentResponse> GenerateContentStreamAsy
 
   private async Task<CountTokensResponse> CountTokensAsyncInternal(
       IEnumerable<ModelContent> content) {
-    // TODO: Implementation
-    await Task.CompletedTask;
-    throw new NotImplementedException();
+    HttpRequestMessage request = new(HttpMethod.Post, GetURL() + ":countTokens");
+
+    // Set the request headers
+    SetRequestHeaders(request);
+
+    // Set the content
+    string bodyJson = MakeCountTokensRequest(content);
+    request.Content = new StringContent(bodyJson, Encoding.UTF8, "application/json");
+
+#if FIREBASE_LOG_REST_CALLS
+    UnityEngine.Debug.Log("CountTokensRequest:\n" + bodyJson);
+#endif
+
+    HttpResponseMessage response;
+    try {
+      response = await _httpClient.SendAsync(request);
+      response.EnsureSuccessStatusCode();
+    } catch (TaskCanceledException e) when (e.InnerException is TimeoutException) {
+      throw new VertexAIRequestTimeoutException("Request timed out.", e);
+    } catch (HttpRequestException e) {
+      // TODO: Convert to a more precise exception when possible.
+      throw new VertexAIException("HTTP request failed.", e);
+    }
+
+    string result = await response.Content.ReadAsStringAsync();
+
+#if FIREBASE_LOG_REST_CALLS
+    UnityEngine.Debug.Log("CountTokensResponse:\n" + result);
+#endif
+
+    return CountTokensResponse.FromJson(result);
   }
 
   private string GetURL() {
@@ -283,6 +336,25 @@ private string ModelContentsToJson(IEnumerable<ModelContent> contents) {
 
     return Json.Serialize(jsonDict);
   }
+
+  // CountTokensRequest is a subset of the full info needed for GenerateContent
+  private string MakeCountTokensRequest(IEnumerable<ModelContent> contents) {
+    Dictionary<string, object> jsonDict = new() {
+      // Convert the Contents into a list of Json dictionaries
+      ["contents"] = contents.Select(c => c.ToJson()).ToList()
+    };
+    if (_generationConfig.HasValue) {
+      jsonDict["generationConfig"] = _generationConfig?.ToJson();
+    }
+    if (_tools != null && _tools.Length > 0) {
+      jsonDict["tools"] = _tools.Select(t => t.ToJson()).ToList();
+    }
+    if (_systemInstruction.HasValue) {
+      jsonDict["systemInstruction"] = _systemInstruction?.ToJson();
+    }
+
+    return Json.Serialize(jsonDict);
+  }
 }
 
 }
diff --git a/vertexai/src/ModalityTokenCount.cs b/vertexai/src/ModalityTokenCount.cs
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Firebase.VertexAI.Internal;
+
+namespace Firebase.VertexAI {
+
+/// <summary>
+/// Content part modality.
+/// </summary>
+public enum ContentModality {
+  /// <summary>
+  /// A new and not yet supported value.
+  /// </summary>
+  Unknown = 0,
+  /// <summary>
+  /// Plain text.
+  /// </summary>
+  Text,
+  /// <summary>
+  /// Image.
+  /// </summary>
+  Image,
+  /// <summary>
+  /// Video.
+  /// </summary>
+  Video,
+  /// <summary>
+  /// Audio.
+  /// </summary>
+  Audio,
+  /// <summary>
+  /// Document, e.g. PDF.
+  /// </summary>
+  Document,
+}
+
+/// <summary>
+/// Represents token counting info for a single modality.
+/// </summary>
+public readonly struct ModalityTokenCount {
+  /// <summary>
+  /// The modality associated with this token count.
+  /// </summary>
+  public ContentModality Modality { get; }
+  /// <summary>
+  /// The number of tokens counted.
+  /// </summary>
+  public int TokenCount { get; }
+
+  // Hidden constructor, users don't need to make this
+  private ModalityTokenCount(ContentModality modality, int tokenCount) {
+    Modality = modality;
+    TokenCount = tokenCount;
+  }
+
+  private static ContentModality ParseModality(string str) {
+    return str switch {
+      "TEXT" => ContentModality.Text,
+      "IMAGE" => ContentModality.Image,
+      "VIDEO" => ContentModality.Video,
+      "AUDIO" => ContentModality.Audio,
+      "DOCUMENT" => ContentModality.Document,
+      _ => ContentModality.Unknown,
+    };
+  }
+
+  /// <summary>
+  /// Intended for internal use only.
+  /// This method is used for deserializing JSON responses and should not be called directly.
+  /// </summary>
+  internal static ModalityTokenCount FromJson(Dictionary<string, object> jsonDict) {
+    return new ModalityTokenCount(
+      jsonDict.ParseEnum("modality", ParseModality),
+      jsonDict.ParseValue<int>("tokenCount"));
+  }
+}
+
+}
diff --git a/vertexai/src/ModalityTokenCount.cs.meta b/vertexai/src/ModalityTokenCount.cs.meta
diff --git a/vertexai/src/VertexAIException.cs b/vertexai/src/VertexAIException.cs
@@ -19,7 +19,7 @@
 
 namespace Firebase.VertexAI {
 
-public abstract class VertexAIException : Exception {
+public class VertexAIException : Exception {
   internal VertexAIException(string message) : base(message) { }
 
   internal VertexAIException(string message, Exception exception) : base(message, exception) { }
@@ -62,6 +62,8 @@ internal VertexAIResponseStoppedException(GenerateContentResponse response) :
 
 public class VertexAIRequestTimeoutException : VertexAIException {
   internal VertexAIRequestTimeoutException(string message) : base(message) { }
+
+  internal VertexAIRequestTimeoutException(string message, Exception e) : base(message, e) { }
 }
 
 public class VertexAIInvalidLocationException : VertexAIException {
diff --git a/vertexai/testapp/Assets/Firebase/Sample/VertexAI/UIHandlerAutomated.cs b/vertexai/testapp/Assets/Firebase/Sample/VertexAI/UIHandlerAutomated.cs