Skip to content

Commit 9f622ee

Browse files
authored
Merge branch 'tensorzero:main' into main
2 parents 7452a9f + 851c1f1 commit 9f622ee

File tree

8 files changed

+229
-5
lines changed

8 files changed

+229
-5
lines changed

.github/workflows/merge-queue.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ jobs:
275275
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
276276
FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }}
277277
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
278+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
278279

279280
# See 'ci/README.md' at the repository root for more details.
280281
check-all-live-tests-passed:

.github/workflows/slash-command-regen-fixtures.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ env:
99
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
1010
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
1111
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
12+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
1213

1314
permissions:
1415
pull-requests: write # For doing the emoji reaction on a PR comment
@@ -42,6 +43,7 @@ jobs:
4243
echo "FIREWORKS_API_KEY=not_used" >> ui/fixtures/.env
4344
echo "FIREWORKS_BASE_URL=http://mock-inference-provider:3030/fireworks/" >> ui/fixtures/.env
4445
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ui/fixtures/.env
46+
echo "ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }}" >> ui/fixtures/.env
4547
echo "OPENAI_BASE_URL=http://mock-inference-provider:3030/openai/" >> ui/fixtures/.env
4648
echo "FIREWORKS_BASE_URL=http://mock-inference-provider:3030/fireworks/" >> ui/fixtures/.env
4749
echo "S3_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> ui/fixtures/.env

.github/workflows/ui-tests-e2e-model-inference-cache.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ on:
1313
required: false
1414
FIREWORKS_API_KEY:
1515
required: false
16+
ANTHROPIC_API_KEY:
17+
required: false
1618
inputs:
1719
regen_cache:
1820
required: true
@@ -95,6 +97,7 @@ jobs:
9597
echo "FIREWORKS_ACCOUNT_ID=${{ secrets.FIREWORKS_ACCOUNT_ID }}" >> fixtures/.env-gateway
9698
echo "FIREWORKS_API_KEY=${{ secrets.FIREWORKS_API_KEY }}" >> fixtures/.env-gateway
9799
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> fixtures/.env-gateway
100+
echo "ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }}" >> fixtures/.env-gateway
98101
echo "S3_ACCESS_KEY_ID=${{ secrets.S3_ACCESS_KEY_ID }}" >> fixtures/.env-gateway
99102
echo "S3_SECRET_ACCESS_KEY=${{ secrets.S3_SECRET_ACCESS_KEY }}" >> fixtures/.env-gateway
100103
./fixtures/regenerate-model-inference-cache.sh
@@ -124,6 +127,7 @@ jobs:
124127
echo "FIREWORKS_ACCOUNT_ID=${{ secrets.FIREWORKS_ACCOUNT_ID || 'not_used' }}" >> fixtures/.env-gateway
125128
echo "FIREWORKS_API_KEY=${{ secrets.FIREWORKS_API_KEY || 'not_used' }}" >> fixtures/.env-gateway
126129
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY || 'not_used' }}" >> fixtures/.env-gateway
130+
echo "ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY || 'not_used' }}" >> fixtures/.env-gateway
127131
echo "S3_ACCESS_KEY_ID=${{ secrets.S3_ACCESS_KEY_ID }}" >> fixtures/.env-gateway
128132
echo "S3_SECRET_ACCESS_KEY=${{ secrets.S3_SECRET_ACCESS_KEY }}" >> fixtures/.env-gateway
129133
docker compose -f fixtures/docker-compose.e2e.yml up --no-build -d

.github/workflows/ui-tests-e2e.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ jobs:
113113
# The 'ui-tests-e2e' job tests that the UI container starts without some of these variables set,
114114
echo "FIREWORKS_ACCOUNT_ID=fake_fireworks_account" >> fixtures/.env
115115
echo "FIREWORKS_API_KEY=not_used" >> fixtures/.env
116+
echo "ANTHROPIC_API_KEY=not_used" >> fixtures/.env
116117
echo "FIREWORKS_BASE_URL=http://mock-inference-provider:3030/fireworks/" >> fixtures/.env
117118
echo "OPENAI_API_KEY=not_used" >> fixtures/.env
118119
echo "OPENAI_BASE_URL=http://mock-inference-provider:3030/openai/" >> fixtures/.env

tensorzero-core/src/endpoints/inference.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,10 @@ pub async fn inference(
239239
&clickhouse_connection_info,
240240
)
241241
.await?;
242-
tracing::Span::current().record("episode_id", episode_id.to_string());
242+
// Record the episode id if we didn't already have one
243+
if params.episode_id.is_none() {
244+
tracing::Span::current().record("episode_id", episode_id.to_string());
245+
}
243246

244247
let (function, function_name) = find_function(&params, &config)?;
245248
let mut candidate_variants: BTreeMap<String, Arc<VariantInfo>> =

tensorzero-core/src/observability.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,13 @@ pub fn build_opentelemetry_layer<T: SpanExporter + 'static>(
9393
// We only expose spans that explicitly contain field prefixed with "http." or "otel."
9494
// For example, `#[instrument(fields(otel.name = "my_otel_name"))]` will be exported
9595
let filter = filter::filter_fn(|metadata| {
96-
metadata.fields().iter().any(|field| {
97-
field.name().starts_with("http.") || field.name().starts_with("otel.")
98-
})
96+
if metadata.is_event() {
97+
matches!(metadata.level(), &tracing::Level::ERROR)
98+
} else {
99+
metadata.fields().iter().any(|field| {
100+
field.name().starts_with("http.") || field.name().starts_with("otel.")
101+
})
102+
}
99103
});
100104

101105
reload_handle

tensorzero-core/tests/e2e/otel.rs

Lines changed: 139 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::{
33
sync::{Arc, Mutex},
44
};
55

6-
use opentelemetry::{KeyValue, SpanId, Value};
6+
use opentelemetry::{trace::Status, KeyValue, SpanId, Value};
77
use opentelemetry_sdk::{
88
error::OTelSdkResult,
99
trace::{SpanData, SpanExporter},
@@ -15,6 +15,7 @@ use tensorzero::{
1515
use tensorzero_core::inference::types::TextKind;
1616
use tensorzero_core::observability::build_opentelemetry_layer;
1717
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
18+
use uuid::Uuid;
1819

1920
use crate::providers::common::make_embedded_gateway_no_config;
2021

@@ -50,6 +51,7 @@ impl CapturingOtelExporter {
5051
}
5152
}
5253

54+
#[derive(Debug)]
5355
pub struct SpanMap {
5456
pub root_spans: Vec<SpanData>,
5557
pub span_children: HashMap<SpanId, Vec<SpanData>>,
@@ -135,6 +137,7 @@ pub async fn test_capture_simple_inference_spans() {
135137
// Since we're using the embedded gateway, the root span will be `function_inference`
136138
// (we won't have a top-level HTTP span)
137139
assert_eq!(root_span.name, "function_inference");
140+
assert_eq!(root_span.status, Status::Unset);
138141
let root_attr_map = attrs_to_map(&root_span.attributes);
139142
assert_eq!(root_attr_map["model_name"], "dummy::good".into());
140143
assert_eq!(
@@ -154,6 +157,7 @@ pub async fn test_capture_simple_inference_spans() {
154157
};
155158

156159
assert_eq!(variant_span.name, "variant_inference");
160+
assert_eq!(variant_span.status, Status::Unset);
157161
let variant_attr_map = attrs_to_map(&variant_span.attributes);
158162
assert_eq!(
159163
variant_attr_map["function_name"],
@@ -168,6 +172,7 @@ pub async fn test_capture_simple_inference_spans() {
168172
};
169173

170174
assert_eq!(model_span.name, "model_inference");
175+
assert_eq!(model_span.status, Status::Unset);
171176
let model_attr_map = attrs_to_map(&model_span.attributes);
172177
assert_eq!(model_attr_map["model_name"], "dummy::good".into());
173178
assert_eq!(model_attr_map["stream"], false.into());
@@ -177,6 +182,7 @@ pub async fn test_capture_simple_inference_spans() {
177182
panic!("Expected one child span: {model_children:#?}");
178183
};
179184
assert_eq!(model_provider_span.name, "model_provider_inference");
185+
assert_eq!(model_provider_span.status, Status::Unset);
180186
let model_provider_attr_map = attrs_to_map(&model_provider_span.attributes);
181187
assert_eq!(model_provider_attr_map["provider_name"], "dummy".into());
182188
assert_eq!(
@@ -199,3 +205,135 @@ pub async fn test_capture_simple_inference_spans() {
199205

200206
assert_eq!(num_spans, 4);
201207
}
208+
209+
#[tokio::test]
210+
pub async fn test_capture_model_error() {
211+
let episode_uuid = Uuid::now_v7();
212+
let exporter = install_capturing_otel_exporter();
213+
214+
let client = make_embedded_gateway_no_config().await;
215+
let _err = client
216+
.inference(ClientInferenceParams {
217+
episode_id: Some(episode_uuid),
218+
model_name: Some("openai::missing-model-name".to_string()),
219+
input: ClientInput {
220+
system: None,
221+
messages: vec![ClientInputMessage {
222+
role: Role::User,
223+
content: vec![ClientInputMessageContent::Text(TextKind::Text {
224+
text: "What is your name?".to_string(),
225+
})],
226+
}],
227+
},
228+
..Default::default()
229+
})
230+
.await
231+
.unwrap_err();
232+
233+
let all_spans = exporter.take_spans();
234+
let num_spans = all_spans.len();
235+
let spans = build_span_map(all_spans);
236+
237+
let [root_span] = spans.root_spans.as_slice() else {
238+
panic!("Expected one root span: {:#?}", spans.root_spans);
239+
};
240+
// Since we're using the embedded gateway, the root span will be `function_inference`
241+
// (we won't have a top-level HTTP span)
242+
assert_eq!(root_span.name, "function_inference");
243+
assert_eq!(
244+
root_span.status,
245+
Status::Error {
246+
description: "".into()
247+
}
248+
);
249+
let root_attr_map = attrs_to_map(&root_span.attributes);
250+
assert_eq!(
251+
root_attr_map["model_name"],
252+
"openai::missing-model-name".into()
253+
);
254+
assert_eq!(root_attr_map["episode_id"], episode_uuid.to_string().into());
255+
assert_eq!(root_attr_map.get("function_name"), None);
256+
assert_eq!(root_attr_map.get("variant_name"), None);
257+
258+
let root_children = &spans.span_children[&root_span.span_context.span_id()];
259+
let [variant_span] = root_children.as_slice() else {
260+
panic!("Expected one child span: {root_children:#?}");
261+
};
262+
263+
assert_eq!(variant_span.name, "variant_inference");
264+
assert_eq!(variant_span.status, Status::Unset);
265+
let variant_attr_map = attrs_to_map(&variant_span.attributes);
266+
assert_eq!(
267+
variant_attr_map["function_name"],
268+
"tensorzero::default".into()
269+
);
270+
assert_eq!(
271+
variant_attr_map["variant_name"],
272+
"openai::missing-model-name".into()
273+
);
274+
assert_eq!(variant_attr_map["stream"], false.into());
275+
276+
let variant_children = &spans.span_children[&variant_span.span_context.span_id()];
277+
let [model_span] = variant_children.as_slice() else {
278+
panic!("Expected one child span: {variant_children:#?}");
279+
};
280+
281+
assert_eq!(model_span.name, "model_inference");
282+
assert_eq!(
283+
model_span.status,
284+
Status::Error {
285+
description: "".into()
286+
}
287+
);
288+
let model_attr_map = attrs_to_map(&model_span.attributes);
289+
assert_eq!(
290+
model_attr_map["model_name"],
291+
"openai::missing-model-name".into()
292+
);
293+
assert_eq!(model_attr_map["stream"], false.into());
294+
295+
let model_children = &spans.span_children[&model_span.span_context.span_id()];
296+
let [model_provider_span] = model_children.as_slice() else {
297+
panic!("Expected one child span: {model_children:#?}");
298+
};
299+
assert_eq!(model_provider_span.name, "model_provider_inference");
300+
assert_eq!(
301+
model_provider_span.status,
302+
Status::Error {
303+
description: "".into()
304+
}
305+
);
306+
assert_eq!(
307+
model_provider_span.events.len(),
308+
1,
309+
"Unexpected number of events: {model_provider_span:#?}",
310+
);
311+
assert!(
312+
model_provider_span.events[0]
313+
.name
314+
.starts_with("Error from openai server:"),
315+
"Unexpected span event: {:?}",
316+
model_provider_span.events[0]
317+
);
318+
let model_provider_attr_map = attrs_to_map(&model_provider_span.attributes);
319+
assert_eq!(model_provider_attr_map["provider_name"], "openai".into());
320+
assert_eq!(
321+
model_provider_attr_map["gen_ai.operation.name"],
322+
"chat".into()
323+
);
324+
assert_eq!(model_provider_attr_map["gen_ai.system"], "openai".into());
325+
assert_eq!(
326+
model_provider_attr_map["gen_ai.request.model"],
327+
"missing-model-name".into()
328+
);
329+
assert_eq!(model_attr_map["stream"], false.into());
330+
331+
assert_eq!(
332+
spans
333+
.span_children
334+
.get(&model_provider_span.span_context.span_id()),
335+
None
336+
);
337+
338+
assert_eq!(num_spans, 4);
339+
}

ui/app/entry.server.tsx

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { PassThrough } from "node:stream";
2+
3+
import type { AppLoadContext, EntryContext } from "react-router";
4+
import { createReadableStreamFromReadable } from "@react-router/node";
5+
import { ServerRouter } from "react-router";
6+
import { isbot } from "isbot";
7+
import type { RenderToPipeableStreamOptions } from "react-dom/server";
8+
import { renderToPipeableStream } from "react-dom/server";
9+
10+
export const streamTimeout = 30_000;
11+
12+
export default function handleRequest(
13+
request: Request,
14+
responseStatusCode: number,
15+
responseHeaders: Headers,
16+
routerContext: EntryContext,
17+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
18+
loadContext: AppLoadContext,
19+
// If you have middleware enabled:
20+
// loadContext: unstable_RouterContextProvider
21+
) {
22+
return new Promise((resolve, reject) => {
23+
let shellRendered = false;
24+
const userAgent = request.headers.get("user-agent");
25+
26+
// Ensure requests from bots and SPA Mode renders wait for all content to load before responding
27+
// https://react.dev/reference/react-dom/server/renderToPipeableStream#waiting-for-all-content-to-load-for-crawlers-and-static-generation
28+
const readyOption: keyof RenderToPipeableStreamOptions =
29+
(userAgent && isbot(userAgent)) || routerContext.isSpaMode
30+
? "onAllReady"
31+
: "onShellReady";
32+
33+
const { pipe, abort } = renderToPipeableStream(
34+
<ServerRouter context={routerContext} url={request.url} />,
35+
{
36+
[readyOption]() {
37+
shellRendered = true;
38+
const body = new PassThrough();
39+
const stream = createReadableStreamFromReadable(body);
40+
41+
responseHeaders.set("Content-Type", "text/html");
42+
43+
resolve(
44+
new Response(stream, {
45+
headers: responseHeaders,
46+
status: responseStatusCode,
47+
}),
48+
);
49+
50+
pipe(body);
51+
},
52+
onShellError(error: unknown) {
53+
reject(error);
54+
},
55+
onError(error: unknown) {
56+
responseStatusCode = 500;
57+
// Log streaming rendering errors from inside the shell. Don't log
58+
// errors encountered during initial shell rendering since they'll
59+
// reject and get logged in handleDocumentRequest.
60+
if (shellRendered) {
61+
console.error(error);
62+
}
63+
},
64+
},
65+
);
66+
67+
// Abort the rendering stream after the `streamTimeout` so it has time to
68+
// flush down the rejected boundaries
69+
setTimeout(abort, streamTimeout + 1000);
70+
});
71+
}

0 commit comments

Comments
 (0)