Skip to content

Commit 14d29d5

Browse files
authored
Merge pull request #3176 from spinframework/select-error-spans
Selective error spans in variables and key-value
2 parents 67e3bf5 + f064001 commit 14d29d5

File tree

9 files changed

+94
-27
lines changed

9 files changed

+94
-27
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/factor-key-value/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ spin-core = { path = "../core" }
1111
spin-factors = { path = "../factors" }
1212
spin-locked-app = { path = "../locked-app" }
1313
spin-resource-table = { path = "../table" }
14+
spin-telemetry = { path = "../telemetry" }
1415
spin-world = { path = "../world" }
16+
thiserror = { workspace = true }
1517
tokio = { workspace = true, features = ["macros", "sync", "rt"] }
1618
toml = { workspace = true }
1719
tracing = { workspace = true }
18-
thiserror = { workspace = true }
1920

2021
[dev-dependencies]
2122
spin-factors-test = { path = "../factors-test" }

crates/factor-key-value/src/host.rs

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ use super::{Cas, SwapError};
22
use anyhow::{Context, Result};
33
use spin_core::{async_trait, wasmtime::component::Resource};
44
use spin_resource_table::Table;
5+
use spin_telemetry::traces::{self, Blame};
56
use spin_world::v2::key_value;
67
use spin_world::wasi::keyvalue as wasi_keyvalue;
78
use std::{collections::HashSet, sync::Arc};
8-
use tracing::{instrument, Level};
9+
use tracing::instrument;
910

1011
const DEFAULT_STORE_TABLE_CAPACITY: u32 = 256;
1112

@@ -69,7 +70,11 @@ impl KeyValueDispatch {
6970
}
7071

7172
pub fn get_store<T: 'static>(&self, store: Resource<T>) -> anyhow::Result<&Arc<dyn Store>> {
72-
self.stores.get(store.rep()).context("invalid store")
73+
let res = self.stores.get(store.rep()).context("invalid store");
74+
if let Err(err) = &res {
75+
traces::mark_as_error(err, Some(Blame::Host));
76+
}
77+
res
7378
}
7479

7580
pub fn get_cas<T: 'static>(&self, cas: Resource<T>) -> Result<&Arc<dyn Cas>> {
@@ -106,7 +111,7 @@ impl KeyValueDispatch {
106111
impl key_value::Host for KeyValueDispatch {}
107112

108113
impl key_value::HostStore for KeyValueDispatch {
109-
#[instrument(name = "spin_key_value.open", skip(self), err(level = Level::INFO), fields(otel.kind = "client", kv.backend=self.manager.summary(&name).unwrap_or("unknown".to_string())))]
114+
#[instrument(name = "spin_key_value.open", skip(self), err, fields(otel.kind = "client", kv.backend=self.manager.summary(&name).unwrap_or("unknown".to_string())))]
110115
async fn open(&mut self, name: String) -> Result<Result<Resource<key_value::Store>, Error>> {
111116
Ok(async {
112117
if self.allowed_stores.contains(&name) {
@@ -124,54 +129,54 @@ impl key_value::HostStore for KeyValueDispatch {
124129
.await)
125130
}
126131

127-
#[instrument(name = "spin_key_value.get", skip(self, store, key), err(level = Level::INFO), fields(otel.kind = "client"))]
132+
#[instrument(name = "spin_key_value.get", skip_all, fields(otel.kind = "client"))]
128133
async fn get(
129134
&mut self,
130135
store: Resource<key_value::Store>,
131136
key: String,
132137
) -> Result<Result<Option<Vec<u8>>, Error>> {
133138
let store = self.get_store(store)?;
134-
Ok(store.get(&key).await)
139+
Ok(store.get(&key).await.map_err(track_error_on_span))
135140
}
136141

137-
#[instrument(name = "spin_key_value.set", skip(self, store, key, value), err(level = Level::INFO), fields(otel.kind = "client"))]
142+
#[instrument(name = "spin_key_value.set", skip_all, fields(otel.kind = "client"))]
138143
async fn set(
139144
&mut self,
140145
store: Resource<key_value::Store>,
141146
key: String,
142147
value: Vec<u8>,
143148
) -> Result<Result<(), Error>> {
144149
let store = self.get_store(store)?;
145-
Ok(store.set(&key, &value).await)
150+
Ok(store.set(&key, &value).await.map_err(track_error_on_span))
146151
}
147152

148-
#[instrument(name = "spin_key_value.delete", skip(self, store, key), err(level = Level::INFO), fields(otel.kind = "client"))]
153+
#[instrument(name = "spin_key_value.delete", skip_all, fields(otel.kind = "client"))]
149154
async fn delete(
150155
&mut self,
151156
store: Resource<key_value::Store>,
152157
key: String,
153158
) -> Result<Result<(), Error>> {
154159
let store = self.get_store(store)?;
155-
Ok(store.delete(&key).await)
160+
Ok(store.delete(&key).await.map_err(track_error_on_span))
156161
}
157162

158-
#[instrument(name = "spin_key_value.exists", skip(self, store, key), err(level = Level::INFO), fields(otel.kind = "client"))]
163+
#[instrument(name = "spin_key_value.exists", skip_all, fields(otel.kind = "client"))]
159164
async fn exists(
160165
&mut self,
161166
store: Resource<key_value::Store>,
162167
key: String,
163168
) -> Result<Result<bool, Error>> {
164169
let store = self.get_store(store)?;
165-
Ok(store.exists(&key).await)
170+
Ok(store.exists(&key).await.map_err(track_error_on_span))
166171
}
167172

168-
#[instrument(name = "spin_key_value.get_keys", skip(self, store), err(level = Level::INFO), fields(otel.kind = "client"))]
173+
#[instrument(name = "spin_key_value.get_keys", skip_all, fields(otel.kind = "client"))]
169174
async fn get_keys(
170175
&mut self,
171176
store: Resource<key_value::Store>,
172177
) -> Result<Result<Vec<String>, Error>> {
173178
let store = self.get_store(store)?;
174-
Ok(store.get_keys().await)
179+
Ok(store.get_keys().await.map_err(track_error_on_span))
175180
}
176181

177182
async fn drop(&mut self, store: Resource<key_value::Store>) -> Result<()> {
@@ -180,8 +185,18 @@ impl key_value::HostStore for KeyValueDispatch {
180185
}
181186
}
182187

188+
/// Make sure that infrastructure related errors are tracked in the current span.
189+
fn track_error_on_span(err: Error) -> Error {
190+
let blame = match err {
191+
Error::NoSuchStore | Error::AccessDenied => Blame::Guest,
192+
Error::StoreTableFull | Error::Other(_) => Blame::Host,
193+
};
194+
traces::mark_as_error(&err, Some(blame));
195+
err
196+
}
197+
183198
fn to_wasi_err(e: Error) -> wasi_keyvalue::store::Error {
184-
match e {
199+
match track_error_on_span(e) {
185200
Error::AccessDenied => wasi_keyvalue::store::Error::AccessDenied,
186201
Error::NoSuchStore => wasi_keyvalue::store::Error::NoSuchStore,
187202
Error::StoreTableFull => wasi_keyvalue::store::Error::Other("store table full".to_string()),
@@ -190,6 +205,7 @@ fn to_wasi_err(e: Error) -> wasi_keyvalue::store::Error {
190205
}
191206

192207
impl wasi_keyvalue::store::Host for KeyValueDispatch {
208+
#[instrument(name = "wasi_key_value.open", skip_all, fields(otel.kind = "client"))]
193209
async fn open(
194210
&mut self,
195211
identifier: String,
@@ -217,6 +233,7 @@ impl wasi_keyvalue::store::Host for KeyValueDispatch {
217233

218234
use wasi_keyvalue::store::Bucket;
219235
impl wasi_keyvalue::store::HostBucket for KeyValueDispatch {
236+
#[instrument(name = "wasi_key_value.get", skip_all, fields(otel.kind = "client"))]
220237
async fn get(
221238
&mut self,
222239
self_: Resource<Bucket>,
@@ -226,6 +243,7 @@ impl wasi_keyvalue::store::HostBucket for KeyValueDispatch {
226243
store.get(&key).await.map_err(to_wasi_err)
227244
}
228245

246+
#[instrument(name = "wasi_key_value.set", skip_all, fields(otel.kind = "client"))]
229247
async fn set(
230248
&mut self,
231249
self_: Resource<Bucket>,
@@ -236,6 +254,7 @@ impl wasi_keyvalue::store::HostBucket for KeyValueDispatch {
236254
store.set(&key, &value).await.map_err(to_wasi_err)
237255
}
238256

257+
#[instrument(name = "wasi_key_value.delete", skip_all, fields(otel.kind = "client"))]
239258
async fn delete(
240259
&mut self,
241260
self_: Resource<Bucket>,
@@ -245,6 +264,7 @@ impl wasi_keyvalue::store::HostBucket for KeyValueDispatch {
245264
store.delete(&key).await.map_err(to_wasi_err)
246265
}
247266

267+
#[instrument(name = "wasi_key_value.exists", skip_all, fields(otel.kind = "client"))]
248268
async fn exists(
249269
&mut self,
250270
self_: Resource<Bucket>,
@@ -254,6 +274,7 @@ impl wasi_keyvalue::store::HostBucket for KeyValueDispatch {
254274
store.exists(&key).await.map_err(to_wasi_err)
255275
}
256276

277+
#[instrument(name = "wasi_key_value.list_keys", skip_all, fields(otel.kind = "client"))]
257278
async fn list_keys(
258279
&mut self,
259280
self_: Resource<Bucket>,
@@ -278,7 +299,7 @@ impl wasi_keyvalue::store::HostBucket for KeyValueDispatch {
278299
}
279300

280301
impl wasi_keyvalue::batch::Host for KeyValueDispatch {
281-
#[instrument(name = "spin_key_value.get_many", skip(self, bucket, keys), err(level = Level::INFO), fields(otel.kind = "client"))]
302+
#[instrument(name = "spin_key_value.get_many", skip_all, fields(otel.kind = "client"))]
282303
#[allow(clippy::type_complexity)]
283304
async fn get_many(
284305
&mut self,
@@ -292,7 +313,7 @@ impl wasi_keyvalue::batch::Host for KeyValueDispatch {
292313
store.get_many(keys).await.map_err(to_wasi_err)
293314
}
294315

295-
#[instrument(name = "spin_key_value.set_many", skip(self, bucket, key_values), err(level = Level::INFO), fields(otel.kind = "client"))]
316+
#[instrument(name = "spin_key_value.set_many", skip_all, fields(otel.kind = "client"))]
296317
async fn set_many(
297318
&mut self,
298319
bucket: Resource<wasi_keyvalue::batch::Bucket>,
@@ -305,7 +326,7 @@ impl wasi_keyvalue::batch::Host for KeyValueDispatch {
305326
store.set_many(key_values).await.map_err(to_wasi_err)
306327
}
307328

308-
#[instrument(name = "spin_key_value.delete_many", skip(self, bucket, keys), err(level = Level::INFO), fields(otel.kind = "client"))]
329+
#[instrument(name = "spin_key_value.delete_many", skip_all, fields(otel.kind = "client"))]
309330
async fn delete_many(
310331
&mut self,
311332
bucket: Resource<wasi_keyvalue::batch::Bucket>,
@@ -320,6 +341,7 @@ impl wasi_keyvalue::batch::Host for KeyValueDispatch {
320341
}
321342

322343
impl wasi_keyvalue::atomics::HostCas for KeyValueDispatch {
344+
#[instrument(name = "wasi_key_value_cas.new", skip_all, fields(otel.kind = "client"))]
323345
async fn new(
324346
&mut self,
325347
bucket: Resource<wasi_keyvalue::atomics::Bucket>,
@@ -342,6 +364,7 @@ impl wasi_keyvalue::atomics::HostCas for KeyValueDispatch {
342364
.map(Resource::new_own)
343365
}
344366

367+
#[instrument(name = "wasi_key_value_cas.current", skip_all, fields(otel.kind = "client"))]
345368
async fn current(
346369
&mut self,
347370
cas: Resource<wasi_keyvalue::atomics::Cas>,
@@ -366,7 +389,7 @@ impl wasi_keyvalue::atomics::Host for KeyValueDispatch {
366389
Ok(error)
367390
}
368391

369-
#[instrument(name = "spin_key_value.increment", skip(self, bucket, key, delta), err(level = Level::INFO), fields(otel.kind = "client"))]
392+
#[instrument(name = "spin_key_value.increment", skip_all, fields(otel.kind = "client"))]
370393
async fn increment(
371394
&mut self,
372395
bucket: Resource<wasi_keyvalue::atomics::Bucket>,
@@ -377,7 +400,7 @@ impl wasi_keyvalue::atomics::Host for KeyValueDispatch {
377400
store.increment(key, delta).await.map_err(to_wasi_err)
378401
}
379402

380-
#[instrument(name = "spin_key_value.swap", skip(self, cas_res, value), err(level = Level::INFO), fields(otel.kind = "client"))]
403+
#[instrument(name = "spin_key_value.swap", skip_all, fields(otel.kind = "client"))]
381404
async fn swap(
382405
&mut self,
383406
cas_res: Resource<atomics::Cas>,
@@ -424,8 +447,8 @@ use spin_world::v1::key_value::Error as LegacyError;
424447
use spin_world::wasi::keyvalue::atomics;
425448
use spin_world::wasi::keyvalue::atomics::{CasError, HostCas};
426449

427-
fn to_legacy_error(value: key_value::Error) -> LegacyError {
428-
match value {
450+
fn to_legacy_error(err: Error) -> LegacyError {
451+
match err {
429452
Error::StoreTableFull => LegacyError::StoreTableFull,
430453
Error::NoSuchStore => LegacyError::NoSuchStore,
431454
Error::AccessDenied => LegacyError::AccessDenied,

crates/factor-variables/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = { workspace = true }
77
[dependencies]
88
spin-expressions = { path = "../expressions" }
99
spin-factors = { path = "../factors" }
10+
spin-telemetry = { path = "../telemetry" }
1011
spin-world = { path = "../world" }
1112
tracing = { workspace = true }
1213

crates/factor-variables/src/host.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
use spin_factors::anyhow;
2+
use spin_telemetry::traces::{self, Blame};
23
use spin_world::{v1, v2::variables, wasi::config as wasi_config};
3-
use tracing::{instrument, Level};
4+
use tracing::instrument;
45

56
use crate::InstanceState;
67

78
impl variables::Host for InstanceState {
8-
#[instrument(name = "spin_variables.get", skip(self), err(level = Level::INFO), fields(otel.kind = "client"))]
9+
#[instrument(name = "spin_variables.get", skip(self), fields(otel.kind = "client"))]
910
async fn get(&mut self, key: String) -> Result<String, variables::Error> {
1011
let key = spin_expressions::Key::new(&key).map_err(expressions_to_variables_err)?;
1112
self.expression_resolver
@@ -20,6 +21,7 @@ impl variables::Host for InstanceState {
2021
}
2122

2223
impl v1::config::Host for InstanceState {
24+
#[instrument(name = "spin_config.get", skip(self), fields(otel.kind = "client"))]
2325
async fn get_config(&mut self, key: String) -> Result<String, v1::config::Error> {
2426
<Self as variables::Host>::get(self, key)
2527
.await
@@ -36,6 +38,7 @@ impl v1::config::Host for InstanceState {
3638
}
3739

3840
impl wasi_config::store::Host for InstanceState {
41+
#[instrument(name = "wasi_config.get", skip(self), fields(otel.kind = "client"))]
3942
async fn get(&mut self, key: String) -> Result<Option<String>, wasi_config::store::Error> {
4043
match <Self as variables::Host>::get(self, key).await {
4144
Ok(value) => Ok(Some(value)),
@@ -46,6 +49,7 @@ impl wasi_config::store::Host for InstanceState {
4649
}
4750
}
4851

52+
#[instrument(name = "wasi_config.get_all", skip(self), fields(otel.kind = "client"))]
4953
async fn get_all(&mut self) -> Result<Vec<(String, String)>, wasi_config::store::Error> {
5054
let all = self
5155
.expression_resolver
@@ -69,12 +73,18 @@ impl wasi_config::store::Host for InstanceState {
6973
}
7074
}
7175

76+
/// Convert a `spin_expressions::Error` to a `variables::Error`, setting the current span's status and fault attribute.
7277
fn expressions_to_variables_err(err: spin_expressions::Error) -> variables::Error {
7378
use spin_expressions::Error;
79+
let blame = match err {
80+
Error::InvalidName(_) | Error::InvalidTemplate(_) | Error::Undefined(_) => Blame::Guest,
81+
Error::Provider(_) => Blame::Host,
82+
};
83+
traces::mark_as_error(&err, Some(blame));
7484
match err {
7585
Error::InvalidName(msg) => variables::Error::InvalidName(msg),
7686
Error::Undefined(msg) => variables::Error::Undefined(msg),
87+
Error::InvalidTemplate(_) => variables::Error::Other(format!("{err}")),
7788
Error::Provider(err) => variables::Error::Provider(err.to_string()),
78-
other => variables::Error::Other(format!("{other}")),
7989
}
8090
}

crates/telemetry/src/env.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ pub(crate) fn otel_sdk_disabled() -> bool {
7777
}
7878

7979
/// The protocol to use for OTLP exporter.
80+
#[derive(Debug)]
8081
pub(crate) enum OtlpProtocol {
8182
Grpc,
8283
HttpProtobuf,

crates/telemetry/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ mod env;
1313
pub mod logs;
1414
pub mod metrics;
1515
mod propagation;
16-
mod traces;
16+
pub mod traces;
1717

1818
#[cfg(feature = "testing")]
1919
pub mod testing;

crates/telemetry/src/traces.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use opentelemetry_sdk::{
55
Resource,
66
};
77
use tracing::Subscriber;
8+
use tracing_opentelemetry::OpenTelemetrySpanExt as _;
89
use tracing_subscriber::{registry::LookupSpan, EnvFilter, Layer};
910

1011
use crate::detector::SpinResourceDetector;
@@ -61,3 +62,32 @@ pub(crate) fn otel_tracing_layer<S: Subscriber + for<'span> LookupSpan<'span>>(
6162
.with_threads(false)
6263
.with_filter(env_filter))
6364
}
65+
66+
/// Indicate whether the error is more likely caused by the guest or the host.
67+
///
68+
/// This can be used to filter errors in telemetry or logging systems.
69+
#[derive(Debug)]
70+
pub enum Blame {
71+
/// The error is most likely caused by the guest.
72+
Guest,
73+
/// The error might have been caused by the host.
74+
Host,
75+
}
76+
77+
impl Blame {
78+
fn as_str(&self) -> &'static str {
79+
match self {
80+
Blame::Guest => "guest",
81+
Blame::Host => "host",
82+
}
83+
}
84+
}
85+
86+
/// Marks the current span as an error with the given error message and optional blame.
87+
pub fn mark_as_error<E: std::fmt::Display>(err: &E, blame: Option<Blame>) {
88+
let current_span = tracing::Span::current();
89+
current_span.set_status(opentelemetry::trace::Status::error(err.to_string()));
90+
if let Some(blame) = blame {
91+
current_span.set_attribute("error.blame", blame.as_str());
92+
}
93+
}

hack/o11y-stack/docker-compose.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
version: "3"
21
services:
32
otel-collector:
43
image: otel/opentelemetry-collector-contrib:0.98.0

0 commit comments

Comments
 (0)