countTokens

michael-ameri · michael-ameri · commit 9c71ec9579f8 · 2024-06-09T16:40:38.000+02:00
diff --git a/gemini-api/src/main/java/swiss/ameri/gemini/api/GenAi.java b/gemini-api/src/main/java/swiss/ameri/gemini/api/GenAi.java
@@ -131,6 +131,41 @@ public List<SafetyRating> safetyRatings(UUID id) {
                 .toList();
     }
 
+
+    /**
+     * Runs a model's tokenizer on input content and returns the token count.
+     * When using long prompts, it might be useful to count tokens before sending any content to the model.
+     *
+     * @param model to be analyzed
+     * @return the token count
+     */
+    public CompletableFuture<Long> countTokens(GenerativeModel model) {
+        return execute(() -> {
+            CompletableFuture<HttpResponse<String>> response = client.sendAsync(
+                    HttpRequest.newBuilder()
+                            .POST(HttpRequest.BodyPublishers.ofString(
+                                    jsonParser.toJson(new CountTokenRequest(convert(model)))
+                            ))
+                            .uri(URI.create("%s/%s:countTokens?key=%s".formatted(urlPrefix, model.modelName(), apiKey)))
+                            .build(),
+                    HttpResponse.BodyHandlers.ofString()
+            );
+            return response
+                    .thenApply(HttpResponse::body)
+                    .thenApply(body -> {
+                        try {
+                            var ctr = jsonParser.fromJson(body, CountTokenResponse.class);
+                            if (ctr.totalTokens() == null) {
+                                throw new RuntimeException("No token field in response");
+                            }
+                            return ctr.totalTokens();
+                        } catch (Exception e) {
+                            throw new RuntimeException("Unexpected body:\n" + body, e);
+                        }
+                    });
+        });
+    }
+
     /**
      * Generates a response from Gemini API based on the given {@code model}. The response is streamed in chunks of text. The
      * stream items are delivered as they arrive.
@@ -210,7 +245,7 @@ public CompletableFuture<GeneratedContent> generateContent(GenerativeModel model
     private static GenerateContentRequest convert(GenerativeModel model) {
         List<GenerationContent> generationContents = model.contents().stream()
                 .map(content -> {
-                    // todo change to "switch" over sealed type with jdk 21
+                    // change to "switch" over sealed type with jdk 21
                     if (content instanceof Content.TextContent textContent) {
                         return new GenerationContent(
                                 textContent.role(),
@@ -259,7 +294,7 @@ private static GenerateContentRequest convert(GenerativeModel model) {
                     }
                 })
                 .toList();
-        return new GenerateContentRequest(generationContents, model.safetySettings(), model.generationConfig());
+        return new GenerateContentRequest(model.modelName(), generationContents, model.safetySettings(), model.generationConfig());
     }
 
     private <T> T execute(ThrowingSupplier<T> supplier) {
@@ -344,6 +379,16 @@ public record TypedSafetyRating(
 
     }
 
+    private record CountTokenRequest(
+            GenerateContentRequest generateContentRequest
+    ) {
+    }
+
+    private record CountTokenResponse(
+            Long totalTokens
+    ) {
+    }
+
     private record GenerateContentResponse(
             UsageMetadata usageMetadata,
             List<ResponseCandidate> candidates
@@ -359,6 +404,9 @@ private record ResponseCandidate(
     }
 
     private record GenerateContentRequest(
+            // for some reason, model is required for countToken, but not for the others.
+            // But it seems to be acceptable for the others, so we just add it to all for now
+            String model,
             List<GenerationContent> contents,
             List<SafetySetting> safetySettings,
             GenerationConfig generationConfig
diff --git a/gemini-tester/src/main/java/swiss/ameri/gemini/tester/GeminiTester.java b/gemini-tester/src/main/java/swiss/ameri/gemini/tester/GeminiTester.java
@@ -34,6 +34,7 @@ public static void main(String[] args) throws Exception {
             // each method represents an example usage
             listModels(genAi);
             getModel(genAi);
+            countTokens(genAi);
             generateContent(genAi);
             generateContentStream(genAi);
             multiChatTurn(genAi);
@@ -43,6 +44,14 @@ public static void main(String[] args) throws Exception {
 
     }
 
+    private static void countTokens(GenAi genAi) {
+        System.out.println("----- count tokens");
+        var model = createStoryModel();
+        Long result = genAi.countTokens(model)
+                .join();
+        System.out.println("Tokens: " + result);
+    }
+
     private static void multiChatTurn(GenAi genAi) {
         System.out.println("----- multi turn chat");
         GenerativeModel chatModel = GenerativeModel.builder()