1
1
/*
2
2
This is a basic rate limitation for free and metered usage of LLMs.
3
- - any call must be identified by an account (we had by a token , but it got abused)
3
+ - any call must be identified by an account (we had just by a cookie ID , but it got abused, hence noAccount=0 )
4
4
- There is a distinction between "cocalc.com" and "on-prem":
5
5
- cocalc.com has some models (the more expensive ones) which are metered per token and some which are free
6
6
- on-prem: there is only rate limiting, no metered usage
@@ -11,7 +11,7 @@ This is a basic rate limitation for free and metered usage of LLMs.
11
11
12
12
import { isObject } from "lodash" ;
13
13
14
- import { newCounter , newGauge } from "@cocalc/backend/metrics" ;
14
+ import { newCounter , newGauge , newHistogram } from "@cocalc/backend/metrics" ;
15
15
import { process_env_int } from "@cocalc/backend/misc" ;
16
16
import getPool , { CacheTime } from "@cocalc/database/pool" ;
17
17
import { getServerSettings } from "@cocalc/database/settings" ;
@@ -30,6 +30,7 @@ import {
30
30
} from "@cocalc/util/db-schema/llm-utils" ;
31
31
import { KUCALC_COCALC_COM } from "@cocalc/util/db-schema/site-defaults" ;
32
32
import { isValidUUID } from "@cocalc/util/misc" ;
33
+ import isValidAccount from "../accounts/is-valid-account" ;
33
34
34
35
// These are tokens over a given period of time – summed by account/analytics_cookie or global.
35
36
const QUOTAS = {
@@ -38,13 +39,20 @@ const QUOTAS = {
38
39
global : process_env_int ( "COCALC_LLM_QUOTA_GLOBAL" , 10 ** 6 ) ,
39
40
} as const ;
40
41
41
- const prom_quotas = newGauge (
42
+ const prom_quota_global = newGauge (
42
43
"llm" ,
43
- "abuse_usage_pct " ,
44
- "Language model abuse, 0 to 100 percent of limit" ,
44
+ "abuse_usage_global_pct " ,
45
+ "Language model abuse limit, global, 0 to 100 percent of limit, rounded " ,
45
46
[ "quota" ] ,
46
47
) ;
47
48
49
+ const prom_quota_per_account = newHistogram (
50
+ "llm" ,
51
+ "abuse_usage_account_pct" ,
52
+ "Language model usage per account, to see if users reach certain thresholds for their account usage." ,
53
+ { buckets : [ 25 , 50 , 75 , 100 , 110 ] } ,
54
+ ) ;
55
+
48
56
const prom_rejected = newCounter (
49
57
"llm" ,
50
58
"abuse_rejected_total" ,
@@ -85,7 +93,6 @@ export async function checkForAbuse({
85
93
( await getServerSettings ( ) ) . kucalc === KUCALC_COCALC_COM ;
86
94
87
95
if ( ! isFreeModel ( model , is_cocalc_com ) ) {
88
- // we exclude Ollama (string), because it is free.
89
96
const service = model2service ( model ) as LanguageServiceCore ;
90
97
// This is a for-pay product, so let's make sure user can purchase it.
91
98
await assertPurchaseAllowed ( { account_id, service } ) ;
@@ -103,7 +110,9 @@ export async function checkForAbuse({
103
110
analytics_cookie,
104
111
} ) ;
105
112
106
- prom_quotas . labels ( "account" ) . set ( 100 * ( usage / QUOTAS . account ) ) ;
113
+ // this fluctuates for each account, we'll tally up how often users end up in certain usage buckets
114
+ // that's more explicit than a histogram
115
+ prom_quota_per_account . observe ( 100 * ( usage / QUOTAS . account ) ) ;
107
116
108
117
// console.log("usage = ", usage);
109
118
if ( account_id ) {
@@ -127,8 +136,9 @@ export async function checkForAbuse({
127
136
// Prevent more sophisticated abuse, e.g., changing analytics_cookie or account frequently,
128
137
// or just a general huge surge in usage.
129
138
const overallUsage = await recentUsage ( { cache : "long" , period : "1 hour" } ) ;
130
- prom_quotas . labels ( "global" ) . set ( 100 * ( overallUsage / QUOTAS . global ) ) ;
131
- // console.log("overallUsage = ", usage);
139
+ prom_quota_global
140
+ . labels ( "global" )
141
+ . set ( Math . round ( 100 * ( overallUsage / QUOTAS . global ) ) ) ;
132
142
if ( overallUsage > QUOTAS . global ) {
133
143
prom_rejected . labels ( "global" ) . inc ( ) ;
134
144
throw new Error (
@@ -156,11 +166,7 @@ async function recentUsage({
156
166
const pool = getPool ( cache ) ;
157
167
let query , args ;
158
168
if ( account_id ) {
159
- const { rows } = await pool . query (
160
- "SELECT COUNT(*) FROM accounts WHERE account_id=$1" ,
161
- [ account_id ] ,
162
- ) ;
163
- if ( rows . length == 0 ) {
169
+ if ( ! ( await isValidAccount ( account_id ) ) ) {
164
170
throw Error ( `invalid account_id ${ account_id } ` ) ;
165
171
}
166
172
query = `SELECT SUM(total_tokens) AS usage FROM openai_chatgpt_log WHERE account_id=$1 AND time >= NOW() - INTERVAL '${ period } '` ;
0 commit comments