feat(client): add the ability to limit token usage

waylaidwanderer · waylaidwanderer · commit d31e9a64f438 · 2023-02-19T20:24:58.000-08:00
diff --git a/README.md b/README.md
@@ -171,22 +171,28 @@ console.log(response);
 import { ChatGPTClient } from '@waylaidwanderer/chatgpt-api';
 
 const clientOptions = {
-  // (Optional) Support for a reverse proxy for the completions endpoint (private API server).
-  // Warning: This will expose your `openaiApiKey` to a third-party. Consider the risks before using this.
-  // reverseProxyUrl: 'https://chatgpt.hato.ai/completions',
-  // (Optional) Parameters as described in https://platform.openai.com/docs/api-reference/completions
-  modelOptions: {
-    // You can override the model name and any other parameters here.
-    // model: 'text-chat-davinci-002-20221122',
-  },
-  // (Optional) Set custom instructions instead of "You are ChatGPT...".
-  // promptPrefix: 'You are Bob, a cowboy in Western times...',
-  // (Optional) Set a custom name for the user
-  // userLabel: 'User',
-  // (Optional) Set a custom name for ChatGPT
-  // chatGptLabel: 'ChatGPT',
-  // (Optional) Set to true to enable `console.debug()` logging
-  debug: false,
+    // (Optional) Support for a reverse proxy for the completions endpoint (private API server).
+    // Warning: This will expose your `openaiApiKey` to a third-party. Consider the risks before using this.
+    // reverseProxyUrl: 'https://chatgpt.hato.ai/completions',
+    // (Optional) Parameters as described in https://platform.openai.com/docs/api-reference/completions
+    modelOptions: {
+        // You can override the model name and any other parameters here.
+        // model: 'text-chat-davinci-002-20221122',
+        // Set max_tokens here to override the default max_tokens of 1000 for the completion.
+        // max_tokens: 1000,
+    },
+    // (Optional) Davinci models have a max context length of 4097 tokens, but you may need to change this for other models.
+    // maxContextTokens: 4097,
+    // (Optional) You might want to lower this to save money if using a paid model like `text-davinci-003`.
+    // maxPromptTokens: 3097,
+    // (Optional) Set custom instructions instead of "You are ChatGPT...".
+    // promptPrefix: 'You are Bob, a cowboy in Western times...',
+    // (Optional) Set a custom name for the user
+    // userLabel: 'User',
+    // (Optional) Set a custom name for ChatGPT
+    // chatGptLabel: 'ChatGPT',
+    // (Optional) Set to true to enable `console.debug()` logging
+    debug: false,
 };
 
 const cacheOptions = {
@@ -238,7 +244,13 @@ module.exports = {
         modelOptions: {
             // You can override the model name and any other parameters here.
             // model: 'text-chat-davinci-002-20221122',
+            // Set max_tokens here to override the default max_tokens of 1000 for the completion.
+            // max_tokens: 1000,
         },
+        // (Optional) Davinci models have a max context length of 4097 tokens, but you may need to change this for other models.
+        // maxContextTokens: 4097,
+        // (Optional) You might want to lower this to save money if using a paid model like `text-davinci-003`.
+        // maxPromptTokens: 3097,
         // (Optional) Set custom instructions instead of "You are ChatGPT...".
         // promptPrefix: 'You are Bob, a cowboy in Western times...',
         // (Optional) Set a custom name for the user
diff --git a/demos/use-client.js b/demos/use-client.js
@@ -9,7 +9,13 @@ const clientOptions = {
     modelOptions: {
         // You can override the model name and any other parameters here.
         // model: 'text-chat-davinci-002-20221122',
+        // Set max_tokens here to override the default max_tokens of 1000 for the completion.
+        // max_tokens: 1000,
     },
+    // (Optional) Davinci models have a max context length of 4097 tokens, but you may need to change this for other models.
+    // maxContextTokens: 4097,
+    // (Optional) You might want to lower this to save money if using a paid model like `text-davinci-003`.
+    // maxPromptTokens: 3097,
     // (Optional) Set custom instructions instead of "You are ChatGPT...".
     // promptPrefix: 'You are Bob, a cowboy in Western times...',
     // (Optional) Set a custom name for the user
diff --git a/settings.example.js b/settings.example.js
@@ -9,7 +9,13 @@ export default {
         modelOptions: {
             // You can override the model name and any other parameters here.
             // model: 'text-chat-davinci-002-20221122',
+            // Set max_tokens here to override the default max_tokens of 1000 for the completion.
+            // max_tokens: 1000,
         },
+        // (Optional) Davinci models have a max context length of 4097 tokens, but you may need to change this for other models.
+        // maxContextTokens: 4097,
+        // (Optional) You might want to lower this to save money if using a paid model like `text-davinci-003`.
+        // maxPromptTokens: 3097,
         // (Optional) Set custom instructions instead of "You are ChatGPT...".
         // promptPrefix: 'You are Bob, a cowboy in Western times...',
         // (Optional) Set a custom name for the user
diff --git a/src/ChatGPTClient.js b/src/ChatGPTClient.js
@@ -26,6 +26,16 @@ export default class ChatGPTClient {
             stop: modelOptions.stop,
         };
 
+        // Davinci models have a max context length of 4097 tokens.
+        this.maxContextTokens = this.options.maxContextTokens || 4097;
+        // I decided to limit conversations to 3097 tokens, leaving 1000 tokens for the response.
+        this.maxPromptTokens = this.options.maxPromptTokens || 3097;
+        this.maxResponseTokens = this.modelOptions.max_tokens || 1000;
+
+        if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) {
+            throw new Error(`maxPromptTokens + max_tokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${this.maxPromptTokens + this.maxResponseTokens}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`);
+        }
+
         this.userLabel = this.options.userLabel || 'User';
         this.chatGptLabel = this.options.chatGptLabel || 'ChatGPT';
 
@@ -258,8 +268,7 @@ export default class ChatGPTClient {
 
         let currentTokenCount = this.getTokenCount(`${promptPrefix}${promptSuffix}`);
         let promptBody = '';
-        // I decided to limit conversations to 3097 tokens, leaving 1000 tokens for the response.
-        const maxTokenCount = 3097;
+        const maxTokenCount = this.maxPromptTokens;
         // Iterate backwards through the messages, adding them to the prompt until we reach the max token count.
         while (currentTokenCount < maxTokenCount && orderedMessages.length > 0) {
             const message = orderedMessages.pop();
@@ -280,11 +289,13 @@ export default class ChatGPTClient {
             // joined words may combine into a single token. Actually, that isn't really applicable here, but I can't
             // resist doing it the "proper" way.
             const newTokenCount = this.getTokenCount(`${promptPrefix}${newPromptBody}${promptSuffix}`);
-            // Always add the first (technically last) message, even if it puts us over the token limit.
-            // TODO: throw an error if the first message is over 3000 tokens
-            if (promptBody && newTokenCount > maxTokenCount) {
-                // This message would put us over the token limit, so don't add it.
-                break;
+            if (newTokenCount > maxTokenCount) {
+                if (promptBody) {
+                    // This message would put us over the token limit, so don't add it.
+                    break;
+                }
+                // This is the first message, so we can't add it. Just throw an error.
+                throw new Error(`Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`);
             }
             promptBody = newPromptBody;
             currentTokenCount = newTokenCount;
@@ -293,8 +304,8 @@ export default class ChatGPTClient {
         const prompt = `${promptBody}${promptSuffix}`;
 
         const numTokens = this.getTokenCount(prompt);
-        // Use up to 4097 tokens (prompt + response), but try to leave 1000 tokens for the response.
-        this.modelOptions.max_tokens = Math.min(4097 - numTokens, 1000);
+        // Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
+        this.modelOptions.max_tokens = Math.min(this.maxContextTokens - numTokens, this.maxResponseTokens);
 
         return prompt;
     }