-
Notifications
You must be signed in to change notification settings - Fork 61
Native inference billing #433
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| import { type BillingEvent } from "@braintrust/proxy"; | ||
|
|
||
| const DEFAULT_BILLING_TELEMETRY_URL = | ||
| "https://api.braintrust.dev/billing/telemetry/ingest"; | ||
|
|
||
| function isBrainModel(model: string): boolean { | ||
| return model.startsWith("brain-"); | ||
| } | ||
|
|
||
| function buildPayloadEvent(event: BillingEvent) { | ||
| if (!event.org_id) { | ||
| console.warn("billing event skipped: missing org_id"); | ||
| return null; | ||
| } | ||
| if (!event.model) { | ||
| console.warn("billing event skipped: missing model"); | ||
| return null; | ||
| } | ||
| // Skip non-brain models since braintrust only hosts brain models. | ||
| if (!isBrainModel(event.model)) { | ||
| return null; | ||
| } | ||
| if (!event.resolved_model) { | ||
| console.warn("billing event skipped: missing resolved_model"); | ||
| return null; | ||
| } | ||
| const hasTokenUsageData = | ||
| event.input_tokens !== undefined || | ||
| event.output_tokens !== undefined || | ||
| event.cached_input_tokens !== undefined || | ||
| event.cache_write_input_tokens !== undefined; | ||
| if (!hasTokenUsageData) { | ||
| console.warn("billing event skipped: missing token usage"); | ||
| return null; | ||
| } | ||
| const requestId = crypto.randomUUID(); | ||
| const timestamp = new Date().toISOString(); | ||
|
|
||
| return { | ||
| event_name: "NativeInferenceTokenUsageEvent", | ||
| external_customer_id: event.org_id, | ||
| timestamp, | ||
| idempotency_key: requestId, | ||
| properties: { | ||
| model: event.model, | ||
| resolved_model: event.resolved_model, | ||
| org_id: event.org_id, | ||
| input_tokens: event.input_tokens, | ||
| output_tokens: event.output_tokens, | ||
| cached_input_tokens: event.cached_input_tokens, | ||
| cache_write_input_tokens: event.cache_write_input_tokens, | ||
| }, | ||
| }; | ||
| } | ||
|
|
||
| export async function sendBillingTelemetryEvent({ | ||
| telemetryUrl, | ||
| event, | ||
| }: { | ||
| telemetryUrl?: string; | ||
| event: BillingEvent; | ||
| }): Promise<void> { | ||
| try { | ||
| const payloadEvent = buildPayloadEvent(event); | ||
| if (!payloadEvent) { | ||
| return; | ||
| } | ||
|
|
||
| const destination = telemetryUrl || DEFAULT_BILLING_TELEMETRY_URL; | ||
| const response = await fetch(destination, { | ||
| method: "POST", | ||
| headers: { | ||
| Authorization: `Bearer ${event.auth_token}`, | ||
| "Content-Type": "application/json", | ||
| }, | ||
| body: JSON.stringify({ | ||
| events: [payloadEvent], | ||
| }), | ||
| }); | ||
|
|
||
| if (!response.ok) { | ||
| const responseBody = await response.text(); | ||
| console.warn( | ||
| `billing event failed: ${response.status} ${response.statusText} ${responseBody}`, | ||
| ); | ||
| } | ||
| } catch (error) { | ||
| console.warn("billing event threw an error", error); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ import { BT_PARENT, resolveParentHeader } from "braintrust/util"; | |
| import { cachedLogin, makeProxySpanLogger } from "./tracing"; | ||
| import { MeterProvider } from "@opentelemetry/sdk-metrics"; | ||
| import { Meter, Attributes, Histogram } from "@opentelemetry/api"; | ||
| import { sendBillingTelemetryEvent } from "./billing"; | ||
|
|
||
| export type LogHistogramFn = (args: { | ||
| name: string; | ||
|
|
@@ -117,6 +118,30 @@ export async function handleProxyV1( | |
| let span: Span | undefined; | ||
| let spanId: string | undefined; | ||
| let spanExport: string | undefined; | ||
| let billingOrgId: string | undefined; | ||
| const orgName = request.headers.get(ORG_NAME_HEADER) ?? undefined; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Billing org lookup is keyed only by Useful? React with 👍 / 👎. |
||
| const apiKey = | ||
| parseAuthHeader({ | ||
| authorization: request.headers.get("authorization") ?? undefined, | ||
| }) ?? undefined; | ||
|
Comment on lines
+123
to
+126
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This API key extraction only reads Useful? React with 👍 / 👎. |
||
|
|
||
| const getLoginState = async () => | ||
| cachedLogin({ | ||
| appUrl: braintrustAppUrl(env).toString(), | ||
| apiKey, | ||
| orgName, | ||
| cache: credentialsCache, | ||
| }); | ||
|
|
||
| if (apiKey) { | ||
| try { | ||
| const loginState = await getLoginState(); | ||
| billingOrgId = loginState.orgId ?? undefined; | ||
| } catch (error) { | ||
| console.warn("Failed to resolve billing org id", error); | ||
| } | ||
| } | ||
|
|
||
| const parentHeader = request.headers.get(BT_PARENT); | ||
| if (parentHeader) { | ||
| let parent; | ||
|
|
@@ -131,19 +156,11 @@ export async function handleProxyV1( | |
| ); | ||
| } | ||
|
|
||
| const orgName = request.headers.get(ORG_NAME_HEADER) ?? undefined; | ||
| const apiKey = | ||
| parseAuthHeader({ | ||
| authorization: request.headers.get("authorization") ?? undefined, | ||
| }) ?? undefined; | ||
| const loginState = await getLoginState(); | ||
| billingOrgId = loginState.orgId ?? undefined; | ||
|
|
||
| span = startSpan({ | ||
| state: await cachedLogin({ | ||
| appUrl: braintrustAppUrl(env).toString(), | ||
| apiKey, | ||
| orgName, | ||
| cache: credentialsCache, | ||
| }), | ||
| state: loginState, | ||
| type: "llm", | ||
| name: "LLM", | ||
| parent: parent.toStr(), | ||
|
|
@@ -199,6 +216,17 @@ export async function handleProxyV1( | |
| spanLogger, | ||
| spanId, | ||
| spanExport, | ||
| billingOrgId, | ||
| onBillingEvent: (event) => { | ||
| ctx.waitUntil( | ||
| sendBillingTelemetryEvent({ | ||
| telemetryUrl: env.BILLING_TELEMETRY_URL, | ||
| event, | ||
| }).catch((error) => { | ||
| console.warn("billing waitUntil task failed", error); | ||
| }), | ||
| ); | ||
| }, | ||
| nativeInferenceSecretKey: env.NATIVE_INFERENCE_SECRET_KEY, | ||
| }; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,10 +28,12 @@ head_sampling_rate = 0.2 | |
| # You should not need to edit this | ||
| BRAINTRUST_APP_URL = "https://www.braintrust.dev" | ||
| METRICS_LICENSE_KEY="<YOUR_METRICS_LICENSE_KEY>" | ||
| BILLING_TELEMETRY_URL="https://api.braintrust.dev/billing/telemetry/ingest" | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this how u configure the proxy? |
||
|
|
||
| [env.staging.vars] | ||
| BRAINTRUST_APP_URL = "https://www.braintrust.dev" | ||
| METRICS_LICENSE_KEY="<YOUR_METRICS_LICENSE_KEY>" | ||
| BILLING_TELEMETRY_URL="https://api.braintrust.dev/billing/telemetry/ingest" | ||
|
|
||
| [env.staging] | ||
| kv_namespaces = [ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i added this guard after testing with gpt-5-mini