Skip to content

Commit 7a1d296

Browse files
authored
🤖 feat: Gemini 3.1 Pricing and Context Window (#11884)
- Added support for the new Gemini 3.1 models, including 'gemini-3.1-pro-preview' and 'gemini-3.1-pro-preview-customtools'. - Updated pricing logic to apply standard and premium rates based on token usage thresholds for the new models. - Enhanced tests to validate pricing behavior for both standard and premium scenarios. - Modified configuration files to include Gemini 3.1 models in the default model lists and token value mappings. - Updated environment example file to reflect the new model options.
1 parent a103ce7 commit 7a1d296

File tree

8 files changed

+450
-2
lines changed

8 files changed

+450
-2
lines changed

.env.example

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,10 @@ GOOGLE_KEY=user_provided
193193
# GOOGLE_AUTH_HEADER=true
194194

195195
# Gemini API (AI Studio)
196-
# GOOGLE_MODELS=gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
196+
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
197197

198198
# Vertex AI
199-
# GOOGLE_MODELS=gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
199+
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
200200

201201
# GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001
202202

api/models/Transaction.spec.js

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,139 @@ describe('Premium Token Pricing Integration Tests', () => {
823823
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0);
824824
});
825825

826+
test('spendTokens should apply standard pricing for gemini-3.1-pro-preview below threshold', async () => {
827+
const userId = new mongoose.Types.ObjectId();
828+
const initialBalance = 100000000;
829+
await Balance.create({ user: userId, tokenCredits: initialBalance });
830+
831+
const model = 'gemini-3.1-pro-preview';
832+
const promptTokens = 100000;
833+
const completionTokens = 500;
834+
835+
const txData = {
836+
user: userId,
837+
conversationId: 'test-gemini31-below',
838+
model,
839+
context: 'test',
840+
endpointTokenConfig: null,
841+
balance: { enabled: true },
842+
};
843+
844+
await spendTokens(txData, { promptTokens, completionTokens });
845+
846+
const standardPromptRate = tokenValues['gemini-3.1'].prompt;
847+
const standardCompletionRate = tokenValues['gemini-3.1'].completion;
848+
const expectedCost =
849+
promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
850+
851+
const updatedBalance = await Balance.findOne({ user: userId });
852+
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
853+
});
854+
855+
test('spendTokens should apply premium pricing for gemini-3.1-pro-preview above threshold', async () => {
856+
const userId = new mongoose.Types.ObjectId();
857+
const initialBalance = 100000000;
858+
await Balance.create({ user: userId, tokenCredits: initialBalance });
859+
860+
const model = 'gemini-3.1-pro-preview';
861+
const promptTokens = 250000;
862+
const completionTokens = 500;
863+
864+
const txData = {
865+
user: userId,
866+
conversationId: 'test-gemini31-above',
867+
model,
868+
context: 'test',
869+
endpointTokenConfig: null,
870+
balance: { enabled: true },
871+
};
872+
873+
await spendTokens(txData, { promptTokens, completionTokens });
874+
875+
const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
876+
const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
877+
const expectedCost =
878+
promptTokens * premiumPromptRate + completionTokens * premiumCompletionRate;
879+
880+
const updatedBalance = await Balance.findOne({ user: userId });
881+
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
882+
});
883+
884+
test('spendTokens should apply standard pricing for gemini-3.1-pro-preview at exactly the threshold', async () => {
885+
const userId = new mongoose.Types.ObjectId();
886+
const initialBalance = 100000000;
887+
await Balance.create({ user: userId, tokenCredits: initialBalance });
888+
889+
const model = 'gemini-3.1-pro-preview';
890+
const promptTokens = premiumTokenValues['gemini-3.1'].threshold;
891+
const completionTokens = 500;
892+
893+
const txData = {
894+
user: userId,
895+
conversationId: 'test-gemini31-exact',
896+
model,
897+
context: 'test',
898+
endpointTokenConfig: null,
899+
balance: { enabled: true },
900+
};
901+
902+
await spendTokens(txData, { promptTokens, completionTokens });
903+
904+
const standardPromptRate = tokenValues['gemini-3.1'].prompt;
905+
const standardCompletionRate = tokenValues['gemini-3.1'].completion;
906+
const expectedCost =
907+
promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
908+
909+
const updatedBalance = await Balance.findOne({ user: userId });
910+
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
911+
});
912+
913+
test('spendStructuredTokens should apply premium pricing for gemini-3.1 when total input exceeds threshold', async () => {
914+
const userId = new mongoose.Types.ObjectId();
915+
const initialBalance = 100000000;
916+
await Balance.create({ user: userId, tokenCredits: initialBalance });
917+
918+
const model = 'gemini-3.1-pro-preview';
919+
const txData = {
920+
user: userId,
921+
conversationId: 'test-gemini31-structured-premium',
922+
model,
923+
context: 'message',
924+
endpointTokenConfig: null,
925+
balance: { enabled: true },
926+
};
927+
928+
const tokenUsage = {
929+
promptTokens: {
930+
input: 200000,
931+
write: 10000,
932+
read: 5000,
933+
},
934+
completionTokens: 1000,
935+
};
936+
937+
const totalInput =
938+
tokenUsage.promptTokens.input + tokenUsage.promptTokens.write + tokenUsage.promptTokens.read;
939+
940+
await spendStructuredTokens(txData, tokenUsage);
941+
942+
const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
943+
const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
944+
const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
945+
const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
946+
947+
const expectedPromptCost =
948+
tokenUsage.promptTokens.input * premiumPromptRate +
949+
tokenUsage.promptTokens.write * writeMultiplier +
950+
tokenUsage.promptTokens.read * readMultiplier;
951+
const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
952+
const expectedTotalCost = expectedPromptCost + expectedCompletionCost;
953+
954+
const updatedBalance = await Balance.findOne({ user: userId });
955+
expect(totalInput).toBeGreaterThan(premiumTokenValues['gemini-3.1'].threshold);
956+
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0);
957+
});
958+
826959
test('non-premium models should not be affected by inputTokenCount regardless of prompt size', async () => {
827960
const userId = new mongoose.Types.ObjectId();
828961
const initialBalance = 100000000;

api/models/spendTokens.spec.js

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,135 @@ describe('spendTokens', () => {
878878
expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0);
879879
});
880880

881+
it('should charge standard rates for gemini-3.1-pro-preview when prompt tokens are below threshold', async () => {
882+
const initialBalance = 100000000;
883+
await Balance.create({
884+
user: userId,
885+
tokenCredits: initialBalance,
886+
});
887+
888+
const model = 'gemini-3.1-pro-preview';
889+
const promptTokens = 100000;
890+
const completionTokens = 500;
891+
892+
const txData = {
893+
user: userId,
894+
conversationId: 'test-gemini31-standard-pricing',
895+
model,
896+
context: 'test',
897+
balance: { enabled: true },
898+
};
899+
900+
await spendTokens(txData, { promptTokens, completionTokens });
901+
902+
const expectedCost =
903+
promptTokens * tokenValues['gemini-3.1'].prompt +
904+
completionTokens * tokenValues['gemini-3.1'].completion;
905+
906+
const balance = await Balance.findOne({ user: userId });
907+
expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
908+
});
909+
910+
it('should charge premium rates for gemini-3.1-pro-preview when prompt tokens exceed threshold', async () => {
911+
const initialBalance = 100000000;
912+
await Balance.create({
913+
user: userId,
914+
tokenCredits: initialBalance,
915+
});
916+
917+
const model = 'gemini-3.1-pro-preview';
918+
const promptTokens = 250000;
919+
const completionTokens = 500;
920+
921+
const txData = {
922+
user: userId,
923+
conversationId: 'test-gemini31-premium-pricing',
924+
model,
925+
context: 'test',
926+
balance: { enabled: true },
927+
};
928+
929+
await spendTokens(txData, { promptTokens, completionTokens });
930+
931+
const expectedCost =
932+
promptTokens * premiumTokenValues['gemini-3.1'].prompt +
933+
completionTokens * premiumTokenValues['gemini-3.1'].completion;
934+
935+
const balance = await Balance.findOne({ user: userId });
936+
expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
937+
});
938+
939+
it('should charge premium rates for gemini-3.1-pro-preview-customtools when prompt tokens exceed threshold', async () => {
940+
const initialBalance = 100000000;
941+
await Balance.create({
942+
user: userId,
943+
tokenCredits: initialBalance,
944+
});
945+
946+
const model = 'gemini-3.1-pro-preview-customtools';
947+
const promptTokens = 250000;
948+
const completionTokens = 500;
949+
950+
const txData = {
951+
user: userId,
952+
conversationId: 'test-gemini31-customtools-premium',
953+
model,
954+
context: 'test',
955+
balance: { enabled: true },
956+
};
957+
958+
await spendTokens(txData, { promptTokens, completionTokens });
959+
960+
const expectedCost =
961+
promptTokens * premiumTokenValues['gemini-3.1'].prompt +
962+
completionTokens * premiumTokenValues['gemini-3.1'].completion;
963+
964+
const balance = await Balance.findOne({ user: userId });
965+
expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
966+
});
967+
968+
it('should charge premium rates for structured gemini-3.1 tokens when total input exceeds threshold', async () => {
969+
const initialBalance = 100000000;
970+
await Balance.create({
971+
user: userId,
972+
tokenCredits: initialBalance,
973+
});
974+
975+
const model = 'gemini-3.1-pro-preview';
976+
const txData = {
977+
user: userId,
978+
conversationId: 'test-gemini31-structured-premium',
979+
model,
980+
context: 'test',
981+
balance: { enabled: true },
982+
};
983+
984+
const tokenUsage = {
985+
promptTokens: {
986+
input: 200000,
987+
write: 10000,
988+
read: 5000,
989+
},
990+
completionTokens: 1000,
991+
};
992+
993+
const result = await spendStructuredTokens(txData, tokenUsage);
994+
995+
const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
996+
const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
997+
const writeRate = getCacheMultiplier({ model, cacheType: 'write' });
998+
const readRate = getCacheMultiplier({ model, cacheType: 'read' });
999+
1000+
const expectedPromptCost =
1001+
tokenUsage.promptTokens.input * premiumPromptRate +
1002+
tokenUsage.promptTokens.write * writeRate +
1003+
tokenUsage.promptTokens.read * readRate;
1004+
const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
1005+
1006+
expect(result.prompt.prompt).toBeCloseTo(-expectedPromptCost, 0);
1007+
expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0);
1008+
});
1009+
8811010
it('should not apply premium pricing to non-premium models regardless of prompt size', async () => {
8821011
const initialBalance = 100000000;
8831012
await Balance.create({

api/models/tx.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ const tokenValues = Object.assign(
200200
'gemini-2.5-flash-image': { prompt: 0.15, completion: 30 },
201201
'gemini-3': { prompt: 2, completion: 12 },
202202
'gemini-3-pro-image': { prompt: 2, completion: 120 },
203+
'gemini-3.1': { prompt: 2, completion: 12 },
203204
'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
204205
grok: { prompt: 2.0, completion: 10.0 }, // Base pattern defaults to grok-2
205206
'grok-beta': { prompt: 5.0, completion: 15.0 },
@@ -330,6 +331,8 @@ const cacheTokenValues = {
330331
'kimi-k2-0711-preview': { write: 0.6, read: 0.15 },
331332
'kimi-k2-thinking': { write: 0.6, read: 0.15 },
332333
'kimi-k2-thinking-turbo': { write: 1.15, read: 0.15 },
334+
// Gemini 3.1 models - cache read: $0.20/1M (<=200k), cache write: standard input price
335+
'gemini-3.1': { write: 2, read: 0.2 },
333336
};
334337

335338
/**
@@ -340,6 +343,7 @@ const cacheTokenValues = {
340343
const premiumTokenValues = {
341344
'claude-opus-4-6': { threshold: 200000, prompt: 10, completion: 37.5 },
342345
'claude-sonnet-4-6': { threshold: 200000, prompt: 6, completion: 22.5 },
346+
'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 },
343347
};
344348

345349
/**

0 commit comments

Comments
 (0)