1
1
/*
2
- We initially just implement some very simple rate limitations to prevent very
3
- blatant abuse.
4
-
5
- - at most $10^5$ tokens per signed in user per hour \(that's \$0.20\); that allows for major usage...
6
- but if somebody tried to do something really abusive, it would stop it. Nobody
7
- would hit this in practice unless they are really trying to abuse cocalc...
8
- WRONG: it's very easy to hit this due to large inputs, e.g., analyzing a paper.
9
- - at most $10^6$ tokens per hour across all users \-\- that's \$2/hour. That would
10
- come out to a bit more if sustained than my budget, but allows for bursts.
11
-
12
- See https://help.openai.com/en/articles/7039783-chatgpt-api-faq for the upstream rate limits,
13
- where they limit per minute, not per hour (like below):
14
-
15
- What's the rate limits for the ChatGPT API?
16
-
17
- Free trial users: 20 RPM 40000 TPM
18
- Pay-as-you-go users (first 48 hours): 60 RPM 60000 TPM
19
- Pay-as-you-go users (after 48 hours): 3500 RPM 90000 TPM
20
-
21
- RPM = requests per minute
22
- TPM = tokens per minute
2
+ This is a basic rate limitation for free and metered usage of LLMs.
3
+ - any call must be identified by an account (we had just by a cookie ID, but it got abused, hence noAccount=0)
4
+ - There is a distinction between "cocalc.com" and "on-prem":
5
+ - cocalc.com has some models (the more expensive ones) which are metered per token and some which are free
6
+ - on-prem: there is only rate limiting, no metered usage
7
+ - quotas are adjustable
8
+ - at it's core, this should limit individual users from too much free usage, and overall cap the usage
9
+ - monitoring as necessary, to give feedback for tweaking the parameters
23
10
*/
24
11
25
- import { newCounter , newHistogram } from "@cocalc/backend/metrics" ;
12
+ import { isObject } from "lodash" ;
13
+
14
+ import { newCounter , newGauge , newHistogram } from "@cocalc/backend/metrics" ;
26
15
import { process_env_int } from "@cocalc/backend/misc" ;
27
16
import getPool , { CacheTime } from "@cocalc/database/pool" ;
28
17
import { getServerSettings } from "@cocalc/database/settings" ;
@@ -41,7 +30,7 @@ import {
41
30
} from "@cocalc/util/db-schema/llm-utils" ;
42
31
import { KUCALC_COCALC_COM } from "@cocalc/util/db-schema/site-defaults" ;
43
32
import { isValidUUID } from "@cocalc/util/misc" ;
44
- import { isObject } from "lodash " ;
33
+ import isValidAccount from "../accounts/is-valid-account " ;
45
34
46
35
// These are tokens over a given period of time – summed by account/analytics_cookie or global.
47
36
const QUOTAS = {
@@ -50,18 +39,18 @@ const QUOTAS = {
50
39
global : process_env_int ( "COCALC_LLM_QUOTA_GLOBAL" , 10 ** 6 ) ,
51
40
} as const ;
52
41
53
- const prom_quotas = newHistogram (
42
+ const prom_quota_global = newGauge (
43
+ "llm" ,
44
+ "abuse_usage_global_pct" ,
45
+ "Language model abuse limit, global, 0 to 100 percent of limit, rounded" ,
46
+ [ "quota" ] ,
47
+ ) ;
48
+
49
+ const prom_quota_per_account = newHistogram (
54
50
"llm" ,
55
- "abuse_usage" ,
56
- "Language model abuse usage" ,
57
- {
58
- buckets :
59
- // 10 buckets evenly spaced from 0 to QUOTAS.global
60
- Array . from ( { length : 10 } , ( _ , i ) =>
61
- Math . floor ( ( i * QUOTAS . global ) / 10 ) ,
62
- ) ,
63
- labels : [ "usage" ] ,
64
- } ,
51
+ "abuse_usage_account_pct" ,
52
+ "Language model usage per account, to see if users reach certain thresholds for their account usage." ,
53
+ { buckets : [ 25 , 50 , 75 , 100 , 110 ] } ,
65
54
) ;
66
55
67
56
const prom_rejected = newCounter (
@@ -104,7 +93,6 @@ export async function checkForAbuse({
104
93
( await getServerSettings ( ) ) . kucalc === KUCALC_COCALC_COM ;
105
94
106
95
if ( ! isFreeModel ( model , is_cocalc_com ) ) {
107
- // we exclude Ollama (string), because it is free.
108
96
const service = model2service ( model ) as LanguageServiceCore ;
109
97
// This is a for-pay product, so let's make sure user can purchase it.
110
98
await assertPurchaseAllowed ( { account_id, service } ) ;
@@ -122,7 +110,9 @@ export async function checkForAbuse({
122
110
analytics_cookie,
123
111
} ) ;
124
112
125
- prom_quotas . labels ( "recent" ) . observe ( usage ) ;
113
+ // this fluctuates for each account, we'll tally up how often users end up in certain usage buckets
114
+ // that's more explicit than a histogram
115
+ prom_quota_per_account . observe ( 100 * ( usage / QUOTAS . account ) ) ;
126
116
127
117
// console.log("usage = ", usage);
128
118
if ( account_id ) {
@@ -146,8 +136,9 @@ export async function checkForAbuse({
146
136
// Prevent more sophisticated abuse, e.g., changing analytics_cookie or account frequently,
147
137
// or just a general huge surge in usage.
148
138
const overallUsage = await recentUsage ( { cache : "long" , period : "1 hour" } ) ;
149
- prom_quotas . labels ( "global" ) . observe ( overallUsage ) ;
150
- // console.log("overallUsage = ", usage);
139
+ prom_quota_global
140
+ . labels ( "global" )
141
+ . set ( Math . round ( 100 * ( overallUsage / QUOTAS . global ) ) ) ;
151
142
if ( overallUsage > QUOTAS . global ) {
152
143
prom_rejected . labels ( "global" ) . inc ( ) ;
153
144
throw new Error (
@@ -175,11 +166,7 @@ async function recentUsage({
175
166
const pool = getPool ( cache ) ;
176
167
let query , args ;
177
168
if ( account_id ) {
178
- const { rows } = await pool . query (
179
- "SELECT COUNT(*) FROM accounts WHERE account_id=$1" ,
180
- [ account_id ] ,
181
- ) ;
182
- if ( rows . length == 0 ) {
169
+ if ( ! ( await isValidAccount ( account_id ) ) ) {
183
170
throw Error ( `invalid account_id ${ account_id } ` ) ;
184
171
}
185
172
query = `SELECT SUM(total_tokens) AS usage FROM openai_chatgpt_log WHERE account_id=$1 AND time >= NOW() - INTERVAL '${ period } '` ;
0 commit comments