Skip to content

Commit dce07fe

Browse files
authored
Merge pull request #7643 from sagemathinc/llm-tweak-abuse
llm/abuse improvements
2 parents 4e2f8c0 + 51e77b9 commit dce07fe

File tree

1 file changed

+30
-43
lines changed

1 file changed

+30
-43
lines changed

src/packages/server/llm/abuse.ts

Lines changed: 30 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,17 @@
11
/*
2-
We initially just implement some very simple rate limitations to prevent very
3-
blatant abuse.
4-
5-
- at most $10^5$ tokens per signed in user per hour \(that's \$0.20\); that allows for major usage...
6-
but if somebody tried to do something really abusive, it would stop it. Nobody
7-
would hit this in practice unless they are really trying to abuse cocalc...
8-
WRONG: it's very easy to hit this due to large inputs, e.g., analyzing a paper.
9-
- at most $10^6$ tokens per hour across all users \-\- that's \$2/hour. That would
10-
come out to a bit more if sustained than my budget, but allows for bursts.
11-
12-
See https://help.openai.com/en/articles/7039783-chatgpt-api-faq for the upstream rate limits,
13-
where they limit per minute, not per hour (like below):
14-
15-
What's the rate limits for the ChatGPT API?
16-
17-
Free trial users: 20 RPM 40000 TPM
18-
Pay-as-you-go users (first 48 hours): 60 RPM 60000 TPM
19-
Pay-as-you-go users (after 48 hours): 3500 RPM 90000 TPM
20-
21-
RPM = requests per minute
22-
TPM = tokens per minute
2+
This is a basic rate limitation for free and metered usage of LLMs.
3+
- any call must be identified by an account (we had just by a cookie ID, but it got abused, hence noAccount=0)
4+
- There is a distinction between "cocalc.com" and "on-prem":
5+
- cocalc.com has some models (the more expensive ones) which are metered per token and some which are free
6+
- on-prem: there is only rate limiting, no metered usage
7+
- quotas are adjustable
8+
- at it's core, this should limit individual users from too much free usage, and overall cap the usage
9+
- monitoring as necessary, to give feedback for tweaking the parameters
2310
*/
2411

25-
import { newCounter, newHistogram } from "@cocalc/backend/metrics";
12+
import { isObject } from "lodash";
13+
14+
import { newCounter, newGauge, newHistogram } from "@cocalc/backend/metrics";
2615
import { process_env_int } from "@cocalc/backend/misc";
2716
import getPool, { CacheTime } from "@cocalc/database/pool";
2817
import { getServerSettings } from "@cocalc/database/settings";
@@ -41,7 +30,7 @@ import {
4130
} from "@cocalc/util/db-schema/llm-utils";
4231
import { KUCALC_COCALC_COM } from "@cocalc/util/db-schema/site-defaults";
4332
import { isValidUUID } from "@cocalc/util/misc";
44-
import { isObject } from "lodash";
33+
import isValidAccount from "../accounts/is-valid-account";
4534

4635
// These are tokens over a given period of time – summed by account/analytics_cookie or global.
4736
const QUOTAS = {
@@ -50,18 +39,18 @@ const QUOTAS = {
5039
global: process_env_int("COCALC_LLM_QUOTA_GLOBAL", 10 ** 6),
5140
} as const;
5241

53-
const prom_quotas = newHistogram(
42+
const prom_quota_global = newGauge(
43+
"llm",
44+
"abuse_usage_global_pct",
45+
"Language model abuse limit, global, 0 to 100 percent of limit, rounded",
46+
["quota"],
47+
);
48+
49+
const prom_quota_per_account = newHistogram(
5450
"llm",
55-
"abuse_usage",
56-
"Language model abuse usage",
57-
{
58-
buckets:
59-
// 10 buckets evenly spaced from 0 to QUOTAS.global
60-
Array.from({ length: 10 }, (_, i) =>
61-
Math.floor((i * QUOTAS.global) / 10),
62-
),
63-
labels: ["usage"],
64-
},
51+
"abuse_usage_account_pct",
52+
"Language model usage per account, to see if users reach certain thresholds for their account usage.",
53+
{ buckets: [25, 50, 75, 100, 110] },
6554
);
6655

6756
const prom_rejected = newCounter(
@@ -104,7 +93,6 @@ export async function checkForAbuse({
10493
(await getServerSettings()).kucalc === KUCALC_COCALC_COM;
10594

10695
if (!isFreeModel(model, is_cocalc_com)) {
107-
// we exclude Ollama (string), because it is free.
10896
const service = model2service(model) as LanguageServiceCore;
10997
// This is a for-pay product, so let's make sure user can purchase it.
11098
await assertPurchaseAllowed({ account_id, service });
@@ -122,7 +110,9 @@ export async function checkForAbuse({
122110
analytics_cookie,
123111
});
124112

125-
prom_quotas.labels("recent").observe(usage);
113+
// this fluctuates for each account, we'll tally up how often users end up in certain usage buckets
114+
// that's more explicit than a histogram
115+
prom_quota_per_account.observe(100 * (usage / QUOTAS.account));
126116

127117
// console.log("usage = ", usage);
128118
if (account_id) {
@@ -146,8 +136,9 @@ export async function checkForAbuse({
146136
// Prevent more sophisticated abuse, e.g., changing analytics_cookie or account frequently,
147137
// or just a general huge surge in usage.
148138
const overallUsage = await recentUsage({ cache: "long", period: "1 hour" });
149-
prom_quotas.labels("global").observe(overallUsage);
150-
// console.log("overallUsage = ", usage);
139+
prom_quota_global
140+
.labels("global")
141+
.set(Math.round(100 * (overallUsage / QUOTAS.global)));
151142
if (overallUsage > QUOTAS.global) {
152143
prom_rejected.labels("global").inc();
153144
throw new Error(
@@ -175,11 +166,7 @@ async function recentUsage({
175166
const pool = getPool(cache);
176167
let query, args;
177168
if (account_id) {
178-
const { rows } = await pool.query(
179-
"SELECT COUNT(*) FROM accounts WHERE account_id=$1",
180-
[account_id],
181-
);
182-
if (rows.length == 0) {
169+
if (!(await isValidAccount(account_id))) {
183170
throw Error(`invalid account_id ${account_id}`);
184171
}
185172
query = `SELECT SUM(total_tokens) AS usage FROM openai_chatgpt_log WHERE account_id=$1 AND time >= NOW() - INTERVAL '${period}'`;

0 commit comments

Comments
 (0)