1
1
/*
2
- We initially just implement some very simple rate limitations to prevent very
3
- blatant abuse.
4
-
5
- - at most $10^5$ tokens per signed in user per hour \(that's \$0.20\); that allows for major usage...
6
- but if somebody tried to do something really abusive, it would stop it. Nobody
7
- would hit this in practice unless they are really trying to abuse cocalc...
8
- WRONG: it's very easy to hit this due to large inputs, e.g., analyzing a paper.
9
- - at most $10^6$ tokens per hour across all users \-\- that's \$2/hour. That would
10
- come out to a bit more if sustained than my budget, but allows for bursts.
11
-
12
- See https://help.openai.com/en/articles/7039783-chatgpt-api-faq for the upstream rate limits,
13
- where they limit per minute, not per hour (like below):
14
-
15
- What's the rate limits for the ChatGPT API?
16
-
17
- Free trial users: 20 RPM 40000 TPM
18
- Pay-as-you-go users (first 48 hours): 60 RPM 60000 TPM
19
- Pay-as-you-go users (after 48 hours): 3500 RPM 90000 TPM
20
-
21
- RPM = requests per minute
22
- TPM = tokens per minute
2
+ This is a basic rate limitation for free and metered usage of LLMs.
3
+ - any call must be identified by an account (we had by a token, but it got abused)
4
+ - There is a distinction between "cocalc.com" and "on-prem":
5
+ - cocalc.com has some models (the more expensive ones) which are metered per token and some which are free
6
+ - on-prem: there is only rate limiting, no metered usage
7
+ - quotas are adjustable
8
+ - at it's core, this should limit individual users from too much free usage, and overall cap the usage
9
+ - monitoring as necessary, to give feedback for tweaking the parameters
23
10
*/
24
11
25
- import { newCounter , newHistogram } from "@cocalc/backend/metrics" ;
12
+ import { isObject } from "lodash" ;
13
+
14
+ import { newCounter , newGauge } from "@cocalc/backend/metrics" ;
26
15
import { process_env_int } from "@cocalc/backend/misc" ;
27
16
import getPool , { CacheTime } from "@cocalc/database/pool" ;
28
17
import { getServerSettings } from "@cocalc/database/settings" ;
@@ -41,7 +30,6 @@ import {
41
30
} from "@cocalc/util/db-schema/llm-utils" ;
42
31
import { KUCALC_COCALC_COM } from "@cocalc/util/db-schema/site-defaults" ;
43
32
import { isValidUUID } from "@cocalc/util/misc" ;
44
- import { isObject } from "lodash" ;
45
33
46
34
// These are tokens over a given period of time – summed by account/analytics_cookie or global.
47
35
const QUOTAS = {
@@ -50,18 +38,11 @@ const QUOTAS = {
50
38
global : process_env_int ( "COCALC_LLM_QUOTA_GLOBAL" , 10 ** 6 ) ,
51
39
} as const ;
52
40
53
- const prom_quotas = newHistogram (
41
+ const prom_quotas = newGauge (
54
42
"llm" ,
55
- "abuse_usage" ,
56
- "Language model abuse usage" ,
57
- {
58
- buckets :
59
- // 10 buckets evenly spaced from 0 to QUOTAS.global
60
- Array . from ( { length : 10 } , ( _ , i ) =>
61
- Math . floor ( ( i * QUOTAS . global ) / 10 ) ,
62
- ) ,
63
- labels : [ "usage" ] ,
64
- } ,
43
+ "abuse_usage_pct" ,
44
+ "Language model abuse, 0 to 100 percent of limit" ,
45
+ [ "quota" ] ,
65
46
) ;
66
47
67
48
const prom_rejected = newCounter (
@@ -122,7 +103,7 @@ export async function checkForAbuse({
122
103
analytics_cookie,
123
104
} ) ;
124
105
125
- prom_quotas . labels ( "recent " ) . observe ( usage ) ;
106
+ prom_quotas . labels ( "account " ) . set ( 100 * ( usage / QUOTAS . account ) ) ;
126
107
127
108
// console.log("usage = ", usage);
128
109
if ( account_id ) {
@@ -146,7 +127,7 @@ export async function checkForAbuse({
146
127
// Prevent more sophisticated abuse, e.g., changing analytics_cookie or account frequently,
147
128
// or just a general huge surge in usage.
148
129
const overallUsage = await recentUsage ( { cache : "long" , period : "1 hour" } ) ;
149
- prom_quotas . labels ( "global" ) . observe ( overallUsage ) ;
130
+ prom_quotas . labels ( "global" ) . set ( 100 * ( overallUsage / QUOTAS . global ) ) ;
150
131
// console.log("overallUsage = ", usage);
151
132
if ( overallUsage > QUOTAS . global ) {
152
133
prom_rejected . labels ( "global" ) . inc ( ) ;
0 commit comments