Skip to content

Commit 51e77b9

Browse files
committed
server/llm/abuse: use existing isValidAccount with caching and explicit histogram buckets for accounts
1 parent d12200f commit 51e77b9

File tree

1 file changed

+20
-14
lines changed

1 file changed

+20
-14
lines changed

src/packages/server/llm/abuse.ts

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This is a basic rate limitation for free and metered usage of LLMs.
3-
- any call must be identified by an account (we had by a token, but it got abused)
3+
- any call must be identified by an account (we had just by a cookie ID, but it got abused, hence noAccount=0)
44
- There is a distinction between "cocalc.com" and "on-prem":
55
- cocalc.com has some models (the more expensive ones) which are metered per token and some which are free
66
- on-prem: there is only rate limiting, no metered usage
@@ -11,7 +11,7 @@ This is a basic rate limitation for free and metered usage of LLMs.
1111

1212
import { isObject } from "lodash";
1313

14-
import { newCounter, newGauge } from "@cocalc/backend/metrics";
14+
import { newCounter, newGauge, newHistogram } from "@cocalc/backend/metrics";
1515
import { process_env_int } from "@cocalc/backend/misc";
1616
import getPool, { CacheTime } from "@cocalc/database/pool";
1717
import { getServerSettings } from "@cocalc/database/settings";
@@ -30,6 +30,7 @@ import {
3030
} from "@cocalc/util/db-schema/llm-utils";
3131
import { KUCALC_COCALC_COM } from "@cocalc/util/db-schema/site-defaults";
3232
import { isValidUUID } from "@cocalc/util/misc";
33+
import isValidAccount from "../accounts/is-valid-account";
3334

3435
// These are tokens over a given period of time – summed by account/analytics_cookie or global.
3536
const QUOTAS = {
@@ -38,13 +39,20 @@ const QUOTAS = {
3839
global: process_env_int("COCALC_LLM_QUOTA_GLOBAL", 10 ** 6),
3940
} as const;
4041

41-
const prom_quotas = newGauge(
42+
const prom_quota_global = newGauge(
4243
"llm",
43-
"abuse_usage_pct",
44-
"Language model abuse, 0 to 100 percent of limit",
44+
"abuse_usage_global_pct",
45+
"Language model abuse limit, global, 0 to 100 percent of limit, rounded",
4546
["quota"],
4647
);
4748

49+
const prom_quota_per_account = newHistogram(
50+
"llm",
51+
"abuse_usage_account_pct",
52+
"Language model usage per account, to see if users reach certain thresholds for their account usage.",
53+
{ buckets: [25, 50, 75, 100, 110] },
54+
);
55+
4856
const prom_rejected = newCounter(
4957
"llm",
5058
"abuse_rejected_total",
@@ -85,7 +93,6 @@ export async function checkForAbuse({
8593
(await getServerSettings()).kucalc === KUCALC_COCALC_COM;
8694

8795
if (!isFreeModel(model, is_cocalc_com)) {
88-
// we exclude Ollama (string), because it is free.
8996
const service = model2service(model) as LanguageServiceCore;
9097
// This is a for-pay product, so let's make sure user can purchase it.
9198
await assertPurchaseAllowed({ account_id, service });
@@ -103,7 +110,9 @@ export async function checkForAbuse({
103110
analytics_cookie,
104111
});
105112

106-
prom_quotas.labels("account").set(100 * (usage / QUOTAS.account));
113+
// this fluctuates for each account, we'll tally up how often users end up in certain usage buckets
114+
// that's more explicit than a histogram
115+
prom_quota_per_account.observe(100 * (usage / QUOTAS.account));
107116

108117
// console.log("usage = ", usage);
109118
if (account_id) {
@@ -127,8 +136,9 @@ export async function checkForAbuse({
127136
// Prevent more sophisticated abuse, e.g., changing analytics_cookie or account frequently,
128137
// or just a general huge surge in usage.
129138
const overallUsage = await recentUsage({ cache: "long", period: "1 hour" });
130-
prom_quotas.labels("global").set(100 * (overallUsage / QUOTAS.global));
131-
// console.log("overallUsage = ", usage);
139+
prom_quota_global
140+
.labels("global")
141+
.set(Math.round(100 * (overallUsage / QUOTAS.global)));
132142
if (overallUsage > QUOTAS.global) {
133143
prom_rejected.labels("global").inc();
134144
throw new Error(
@@ -156,11 +166,7 @@ async function recentUsage({
156166
const pool = getPool(cache);
157167
let query, args;
158168
if (account_id) {
159-
const { rows } = await pool.query(
160-
"SELECT COUNT(*) FROM accounts WHERE account_id=$1",
161-
[account_id],
162-
);
163-
if (rows.length == 0) {
169+
if (!(await isValidAccount(account_id))) {
164170
throw Error(`invalid account_id ${account_id}`);
165171
}
166172
query = `SELECT SUM(total_tokens) AS usage FROM openai_chatgpt_log WHERE account_id=$1 AND time >= NOW() - INTERVAL '${period}'`;

0 commit comments

Comments
 (0)