Skip to content

Commit 9beda5e

Browse files
committed
llm/abuse: change monitoring, rewrite requirements
1 parent 4a4a498 commit 9beda5e

File tree

1 file changed

+17
-36
lines changed

1 file changed

+17
-36
lines changed

src/packages/server/llm/abuse.ts

Lines changed: 17 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,17 @@
11
/*
2-
We initially just implement some very simple rate limitations to prevent very
3-
blatant abuse.
4-
5-
- at most $10^5$ tokens per signed in user per hour \(that's \$0.20\); that allows for major usage...
6-
but if somebody tried to do something really abusive, it would stop it. Nobody
7-
would hit this in practice unless they are really trying to abuse cocalc...
8-
WRONG: it's very easy to hit this due to large inputs, e.g., analyzing a paper.
9-
- at most $10^6$ tokens per hour across all users \-\- that's \$2/hour. That would
10-
come out to a bit more if sustained than my budget, but allows for bursts.
11-
12-
See https://help.openai.com/en/articles/7039783-chatgpt-api-faq for the upstream rate limits,
13-
where they limit per minute, not per hour (like below):
14-
15-
What's the rate limits for the ChatGPT API?
16-
17-
Free trial users: 20 RPM 40000 TPM
18-
Pay-as-you-go users (first 48 hours): 60 RPM 60000 TPM
19-
Pay-as-you-go users (after 48 hours): 3500 RPM 90000 TPM
20-
21-
RPM = requests per minute
22-
TPM = tokens per minute
2+
This is a basic rate limitation for free and metered usage of LLMs.
3+
- any call must be identified by an account (we had by a token, but it got abused)
4+
- There is a distinction between "cocalc.com" and "on-prem":
5+
- cocalc.com has some models (the more expensive ones) which are metered per token and some which are free
6+
- on-prem: there is only rate limiting, no metered usage
7+
- quotas are adjustable
8+
- at it's core, this should limit individual users from too much free usage, and overall cap the usage
9+
- monitoring as necessary, to give feedback for tweaking the parameters
2310
*/
2411

25-
import { newCounter, newHistogram } from "@cocalc/backend/metrics";
12+
import { isObject } from "lodash";
13+
14+
import { newCounter, newGauge } from "@cocalc/backend/metrics";
2615
import { process_env_int } from "@cocalc/backend/misc";
2716
import getPool, { CacheTime } from "@cocalc/database/pool";
2817
import { getServerSettings } from "@cocalc/database/settings";
@@ -41,7 +30,6 @@ import {
4130
} from "@cocalc/util/db-schema/llm-utils";
4231
import { KUCALC_COCALC_COM } from "@cocalc/util/db-schema/site-defaults";
4332
import { isValidUUID } from "@cocalc/util/misc";
44-
import { isObject } from "lodash";
4533

4634
// These are tokens over a given period of time – summed by account/analytics_cookie or global.
4735
const QUOTAS = {
@@ -50,18 +38,11 @@ const QUOTAS = {
5038
global: process_env_int("COCALC_LLM_QUOTA_GLOBAL", 10 ** 6),
5139
} as const;
5240

53-
const prom_quotas = newHistogram(
41+
const prom_quotas = newGauge(
5442
"llm",
55-
"abuse_usage",
56-
"Language model abuse usage",
57-
{
58-
buckets:
59-
// 10 buckets evenly spaced from 0 to QUOTAS.global
60-
Array.from({ length: 10 }, (_, i) =>
61-
Math.floor((i * QUOTAS.global) / 10),
62-
),
63-
labels: ["usage"],
64-
},
43+
"abuse_usage_pct",
44+
"Language model abuse, 0 to 100 percent of limit",
45+
["quota"],
6546
);
6647

6748
const prom_rejected = newCounter(
@@ -122,7 +103,7 @@ export async function checkForAbuse({
122103
analytics_cookie,
123104
});
124105

125-
prom_quotas.labels("recent").observe(usage);
106+
prom_quotas.labels("account").set(100 * (usage / QUOTAS.account));
126107

127108
// console.log("usage = ", usage);
128109
if (account_id) {
@@ -146,7 +127,7 @@ export async function checkForAbuse({
146127
// Prevent more sophisticated abuse, e.g., changing analytics_cookie or account frequently,
147128
// or just a general huge surge in usage.
148129
const overallUsage = await recentUsage({ cache: "long", period: "1 hour" });
149-
prom_quotas.labels("global").observe(overallUsage);
130+
prom_quotas.labels("global").set(100 * (overallUsage / QUOTAS.global));
150131
// console.log("overallUsage = ", usage);
151132
if (overallUsage > QUOTAS.global) {
152133
prom_rejected.labels("global").inc();

0 commit comments

Comments
 (0)