Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions apis/cloudflare/src/billing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { type BillingEvent } from "@braintrust/proxy";

const DEFAULT_BILLING_TELEMETRY_URL =
"https://api.braintrust.dev/billing/telemetry/ingest";

function isBrainModel(model: string): boolean {
return model.startsWith("brain-");
Comment on lines +6 to +7
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i added this guard after testing with gpt-5-mini

}

function buildPayloadEvent(event: BillingEvent) {
if (!event.org_id) {
console.warn("billing event skipped: missing org_id");
return null;
}
if (!event.model) {
console.warn("billing event skipped: missing model");
return null;
}
// Skip non-brain models since braintrust only hosts brain models.
if (!isBrainModel(event.model)) {
return null;
}
if (!event.resolved_model) {
console.warn("billing event skipped: missing resolved_model");
return null;
}
const hasTokenUsageData =
event.input_tokens !== undefined ||
event.output_tokens !== undefined ||
event.cached_input_tokens !== undefined ||
event.cache_write_input_tokens !== undefined;
if (!hasTokenUsageData) {
console.warn("billing event skipped: missing token usage");
return null;
}
const requestId = crypto.randomUUID();
const timestamp = new Date().toISOString();

return {
event_name: "NativeInferenceTokenUsageEvent",
external_customer_id: event.org_id,
timestamp,
idempotency_key: requestId,
properties: {
model: event.model,
resolved_model: event.resolved_model,
org_id: event.org_id,
input_tokens: event.input_tokens,
output_tokens: event.output_tokens,
cached_input_tokens: event.cached_input_tokens,
cache_write_input_tokens: event.cache_write_input_tokens,
},
};
}

export async function sendBillingTelemetryEvent({
telemetryUrl,
event,
}: {
telemetryUrl?: string;
event: BillingEvent;
}): Promise<void> {
try {
const payloadEvent = buildPayloadEvent(event);
if (!payloadEvent) {
return;
}

const destination = telemetryUrl || DEFAULT_BILLING_TELEMETRY_URL;
const response = await fetch(destination, {
method: "POST",
headers: {
Authorization: `Bearer ${event.auth_token}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
events: [payloadEvent],
}),
});

if (!response.ok) {
const responseBody = await response.text();
console.warn(
`billing event failed: ${response.status} ${response.statusText} ${responseBody}`,
);
}
} catch (error) {
console.warn("billing event threw an error", error);
}
}
1 change: 1 addition & 0 deletions apis/cloudflare/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ declare global {
BRAINTRUST_APP_URL: string;
WHITELISTED_ORIGINS?: string;
METRICS_LICENSE_KEY?: string;
BILLING_TELEMETRY_URL?: string;
NATIVE_INFERENCE_SECRET_KEY?: string;
}
}
Expand Down
50 changes: 39 additions & 11 deletions apis/cloudflare/src/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { BT_PARENT, resolveParentHeader } from "braintrust/util";
import { cachedLogin, makeProxySpanLogger } from "./tracing";
import { MeterProvider } from "@opentelemetry/sdk-metrics";
import { Meter, Attributes, Histogram } from "@opentelemetry/api";
import { sendBillingTelemetryEvent } from "./billing";

export type LogHistogramFn = (args: {
name: string;
Expand Down Expand Up @@ -117,6 +118,30 @@ export async function handleProxyV1(
let span: Span | undefined;
let spanId: string | undefined;
let spanExport: string | undefined;
let billingOrgId: string | undefined;
const orgName = request.headers.get(ORG_NAME_HEADER) ?? undefined;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Resolve billing org using routed org context

Billing org lookup is keyed only by ORG_NAME_HEADER, but proxy routing can override org from the URL path (/btorg/{org}/... in packages/proxy/src/proxy.ts). In that case, billingOrgId is resolved for the key’s default/header org while the request is actually executed under the path org, so telemetry can be attributed to the wrong external_customer_id and corrupt billing for multi-org keys.

Useful? React with 👍 / 👎.

const apiKey =
parseAuthHeader({
authorization: request.headers.get("authorization") ?? undefined,
}) ?? undefined;
Comment on lines +123 to +126
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Accept x-api-key when deriving billing org id

This API key extraction only reads Authorization, but proxyV1 also supports x-api-key authentication. For clients that authenticate via x-api-key (common in Anthropic-compatible flows), billingOrgId remains unset and buildPayloadEvent skips the telemetry because org_id is missing, so native inference usage is silently not billed.

Useful? React with 👍 / 👎.


const getLoginState = async () =>
cachedLogin({
appUrl: braintrustAppUrl(env).toString(),
apiKey,
orgName,
cache: credentialsCache,
});

if (apiKey) {
try {
const loginState = await getLoginState();
billingOrgId = loginState.orgId ?? undefined;
} catch (error) {
console.warn("Failed to resolve billing org id", error);
}
}

const parentHeader = request.headers.get(BT_PARENT);
if (parentHeader) {
let parent;
Expand All @@ -131,19 +156,11 @@ export async function handleProxyV1(
);
}

const orgName = request.headers.get(ORG_NAME_HEADER) ?? undefined;
const apiKey =
parseAuthHeader({
authorization: request.headers.get("authorization") ?? undefined,
}) ?? undefined;
const loginState = await getLoginState();
billingOrgId = loginState.orgId ?? undefined;

span = startSpan({
state: await cachedLogin({
appUrl: braintrustAppUrl(env).toString(),
apiKey,
orgName,
cache: credentialsCache,
}),
state: loginState,
type: "llm",
name: "LLM",
parent: parent.toStr(),
Expand Down Expand Up @@ -199,6 +216,17 @@ export async function handleProxyV1(
spanLogger,
spanId,
spanExport,
billingOrgId,
onBillingEvent: (event) => {
ctx.waitUntil(
sendBillingTelemetryEvent({
telemetryUrl: env.BILLING_TELEMETRY_URL,
event,
}).catch((error) => {
console.warn("billing waitUntil task failed", error);
}),
);
},
nativeInferenceSecretKey: env.NATIVE_INFERENCE_SECRET_KEY,
};

Expand Down
2 changes: 2 additions & 0 deletions apis/cloudflare/wrangler-template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ head_sampling_rate = 0.2
# You should not need to edit this
BRAINTRUST_APP_URL = "https://www.braintrust.dev"
METRICS_LICENSE_KEY="<YOUR_METRICS_LICENSE_KEY>"
BILLING_TELEMETRY_URL="https://api.braintrust.dev/billing/telemetry/ingest"
Copy link
Copy Markdown
Contributor Author

@knjiang Ken Jiang (knjiang) Apr 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this how u configure the proxy?


[env.staging.vars]
BRAINTRUST_APP_URL = "https://www.braintrust.dev"
METRICS_LICENSE_KEY="<YOUR_METRICS_LICENSE_KEY>"
BILLING_TELEMETRY_URL="https://api.braintrust.dev/billing/telemetry/ingest"

[env.staging]
kv_namespaces = [
Expand Down
6 changes: 5 additions & 1 deletion packages/proxy/edge/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { DEFAULT_BRAINTRUST_APP_URL } from "@lib/constants";
import { flushMetrics } from "@lib/metrics";
import { proxyV1, SpanLogger, LogHistogramFn } from "@lib/proxy";
import { proxyV1, SpanLogger, LogHistogramFn, BillingEvent } from "@lib/proxy";
import { isEmpty } from "@lib/util";
import { MeterProvider } from "@opentelemetry/sdk-metrics";

Expand Down Expand Up @@ -36,6 +36,8 @@ export interface ProxyOpts {
logHistogram?: LogHistogramFn;
whitelist?: (string | RegExp)[];
spanLogger?: SpanLogger;
billingOrgId?: string;
onBillingEvent?: (event: BillingEvent) => void;
spanId?: string;
spanExport?: string;
nativeInferenceSecretKey?: string;
Expand Down Expand Up @@ -398,6 +400,8 @@ export function EdgeProxyV1(opts: ProxyOpts) {
digest: digestMessage,
logHistogram: opts.logHistogram,
spanLogger: opts.spanLogger,
billingOrgId: opts.billingOrgId,
onBillingEvent: opts.onBillingEvent,
});
} catch (e) {
return new Response(`${e}`, {
Expand Down
74 changes: 74 additions & 0 deletions packages/proxy/src/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,19 @@ export interface SpanLogger {
reportProgress: (progress: string) => void;
}

export type BillingEvent = {
event_name: "NativeInferenceTokenUsageEvent";
auth_token: string;
org_id?: string;
model?: string | null;
resolved_model?: string | null;
org_name?: string;
input_tokens?: number;
output_tokens?: number;
cached_input_tokens?: number;
cache_write_input_tokens?: number;
};

// This is an isomorphic implementation of proxyV1, which is used by both edge functions
// in CloudFlare and by the node proxy (locally and in lambda).
export async function proxyV1({
Expand All @@ -208,6 +221,8 @@ export async function proxyV1({
cacheKeyOptions = {},
decompressFetch = false,
spanLogger,
billingOrgId,
onBillingEvent,
signal,
fetch = globalThis.fetch,
}: {
Expand Down Expand Up @@ -237,6 +252,8 @@ export async function proxyV1({
cacheKeyOptions?: CacheKeyOptions;
decompressFetch?: boolean;
spanLogger?: SpanLogger;
billingOrgId?: string;
onBillingEvent?: (event: BillingEvent) => void;
signal?: AbortSignal;
fetch?: FetchFn;
}): Promise<void> {
Expand Down Expand Up @@ -299,6 +316,7 @@ export async function proxyV1({
);

let orgName: string | undefined = proxyHeaders[ORG_NAME_HEADER] ?? undefined;
let resolvedOrgName: string | undefined = orgName;
const projectId: string | undefined =
proxyHeaders[PROJECT_ID_HEADER] ?? undefined;

Expand Down Expand Up @@ -649,6 +667,7 @@ export async function proxyV1({

if (secrets.length > 0 && !orgName && secrets[0].org_name) {
baseAttributes.org_name = secrets[0].org_name;
resolvedOrgName = secrets[0].org_name;
}
logRequest();

Expand Down Expand Up @@ -759,6 +778,11 @@ export async function proxyV1({
if (stream) {
let first = true;
const allChunks: Uint8Array[] = [];
let resolvedModel: string | undefined = undefined;
let inputTokens: number | undefined = undefined;
let outputTokens: number | undefined = undefined;
let cachedInputTokens: number | undefined = undefined;
let cacheWriteInputTokens: number | undefined = undefined;

// These parameters are for the streaming case
let reasoning: OpenAIReasoning[] | undefined = undefined;
Expand Down Expand Up @@ -787,10 +811,20 @@ export async function proxyV1({
| OpenAIChatCompletionChunk
| undefined;
if (result) {
if (typeof result.model === "string" && result.model) {
resolvedModel = result.model;
}
const extendedUsage = completionUsageSchema.safeParse(
result.usage,
);
if (extendedUsage.success) {
inputTokens = extendedUsage.data.prompt_tokens;
outputTokens = extendedUsage.data.completion_tokens;
cachedInputTokens =
extendedUsage.data.prompt_tokens_details?.cached_tokens;
cacheWriteInputTokens =
extendedUsage.data.prompt_tokens_details
?.cache_creation_tokens;
spanLogger?.log({
metrics: {
tokens: extendedUsage.data.total_tokens,
Expand Down Expand Up @@ -978,10 +1012,20 @@ export async function proxyV1({
case "chat":
case "completion": {
const data = dataRaw as ChatCompletion;
if (typeof data.model === "string" && data.model) {
resolvedModel = data.model;
}
const extendedUsage = completionUsageSchema.safeParse(
data.usage,
);
if (extendedUsage.success) {
inputTokens = extendedUsage.data.prompt_tokens;
outputTokens = extendedUsage.data.completion_tokens;
cachedInputTokens =
extendedUsage.data.prompt_tokens_details?.cached_tokens;
cacheWriteInputTokens =
extendedUsage.data.prompt_tokens_details
?.cache_creation_tokens;
spanLogger?.log({
output: data.choices,
metrics: {
Expand Down Expand Up @@ -1041,6 +1085,15 @@ export async function proxyV1({
}
case "response": {
const data = dataRaw as OpenAIResponse;
if (typeof data.model === "string" && data.model) {
resolvedModel = data.model;
}
if (data.usage) {
inputTokens = data.usage.input_tokens;
outputTokens = data.usage.output_tokens;
cachedInputTokens =
data.usage.input_tokens_details?.cached_tokens;
}
spanLogger?.log({
output: data.output,
metrics: {
Expand Down Expand Up @@ -1089,6 +1142,27 @@ export async function proxyV1({
});

spanLogger?.end();
if (!responseFailed) {
try {
if (typeof onBillingEvent !== "function") {
return;
}
onBillingEvent({
event_name: "NativeInferenceTokenUsageEvent",
auth_token: authToken,
org_id: billingOrgId,
model,
resolved_model: resolvedModel,
org_name: resolvedOrgName,
input_tokens: inputTokens,
output_tokens: outputTokens,
cached_input_tokens: cachedInputTokens,
cache_write_input_tokens: cacheWriteInputTokens,
});
} catch (error) {
console.warn("billing callback failed", error);
}
}
controller.terminate();
},
});
Expand Down
Loading