Skip to content

Commit 84aecfb

Browse files
committed
Add more instrumentation (linera-io#4555)
Now that we have distributed tracing (after linera-io#4556), we need more instrumentation so we have data about more functions in the breakdowns. Instrument more functions with `telemetry_only` so that we don't get spammed in our logs, but the spans still get sent to Tempo. Tested this with linera-io#4556, saw the spans properly show in the breakdowns. - Nothing to do / These changes follow the usual release cycle.
1 parent 309828d commit 84aecfb

File tree

10 files changed

+365
-63
lines changed

10 files changed

+365
-63
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

linera-base/src/tracing.rs

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ use std::{
1212

1313
use is_terminal::IsTerminal as _;
1414
use tracing::Subscriber;
15+
#[cfg(all(not(target_arch = "wasm32"), feature = "tempo"))]
16+
use tracing_subscriber::filter::{filter_fn, FilterExt as _};
1517
use tracing_subscriber::{
16-
filter::filter_fn,
1718
fmt::{
1819
self,
1920
format::{FmtSpan, Format, Full},
@@ -59,20 +60,33 @@ pub fn init(log_name: &str) {
5960
/// ## Span Filtering for Performance
6061
///
6162
/// By default, spans created by `#[instrument]` are logged to console AND sent
62-
/// to OpenTelemetry. To disable console output for performance-critical functions
63-
/// while keeping OpenTelemetry tracing, use:
63+
/// to OpenTelemetry. In order to not spam stderr, you can set a low level, and use the
64+
/// `telemetry_only` target:
6465
///
6566
/// ```rust
66-
/// use tracing::instrument;
67+
/// use tracing::{instrument, Level};
68+
///
69+
/// // Always sent to telemetry; console output controlled by level
70+
/// #[instrument(level = "trace", target = "telemetry_only")]
71+
/// fn my_called_too_frequently_function() {
72+
/// // Will be sent to OpenTelemetry regardless of RUST_LOG level
73+
/// // Will only appear in console if RUST_LOG includes trace level
74+
/// }
6775
///
68-
/// #[instrument(target = "telemetry_only")]
69-
/// fn my_performance_critical_function() {
70-
/// // This span will ONLY be sent to OpenTelemetry, not logged to console
76+
/// // Higher level - more likely to appear in console
77+
/// #[instrument(level = "info", target = "telemetry_only")]
78+
/// fn my_important_function() {
79+
/// // Will be sent to OpenTelemetry regardless of RUST_LOG level
80+
/// // Will appear in console if RUST_LOG includes info level or higher
7181
/// }
7282
/// ```
7383
///
74-
/// All explicit log calls (tracing::info!(), etc.) are always printed regardless
75-
/// of span filtering.
84+
/// **Key behaviors:**
85+
/// - If span level >= RUST_LOG level: span goes to BOTH telemetry AND console (regardless of target)
86+
/// - If span level < RUST_LOG level AND target = "telemetry_only": span goes to telemetry ONLY
87+
/// - If span level < RUST_LOG level AND target != "telemetry_only": span is filtered out completely
88+
/// - Default level for `telemetry_only` should be `trace` for minimal console noise
89+
/// - All explicit log calls (tracing::info!(), etc.) are always printed regardless of span filtering
7690
pub async fn init_with_opentelemetry(log_name: &str) {
7791
#[cfg(feature = "tempo")]
7892
{
@@ -102,28 +116,13 @@ fn init_internal(log_name: &str, with_opentelemetry: bool) {
102116
let color_output =
103117
!std::env::var("NO_COLOR").is_ok_and(|x| !x.is_empty()) && std::io::stderr().is_terminal();
104118

105-
// Create a filter that:
106-
// 1. Allows all explicit events (tracing::info!(), etc.)
107-
// 2. Allows spans from #[instrument] by default
108-
// 3. Blocks spans ONLY if they have target = "telemetry_only"
109-
let console_filter = filter_fn(|metadata| {
110-
if metadata.is_span() {
111-
// Block spans that explicitly request telemetry-only via target = "telemetry_only"
112-
metadata.target() != "telemetry_only"
113-
} else {
114-
// Always allow explicit log events (tracing::info!(), etc.)
115-
true
116-
}
117-
});
118-
119119
let stderr_layer = prepare_formatted_layer(
120120
format.as_deref(),
121121
fmt::layer()
122122
.with_span_events(span_events.clone())
123123
.with_writer(std::io::stderr)
124124
.with_ansi(color_output),
125-
)
126-
.with_filter(console_filter.clone());
125+
);
127126

128127
let maybe_log_file_layer = open_log_file(log_name).map(|file_writer| {
129128
prepare_formatted_layer(
@@ -133,7 +132,6 @@ fn init_internal(log_name: &str, with_opentelemetry: bool) {
133132
.with_writer(Arc::new(file_writer))
134133
.with_ansi(false),
135134
)
136-
.with_filter(console_filter.clone())
137135
});
138136

139137
#[cfg(any(target_arch = "wasm32", not(feature = "tempo")))]
@@ -173,7 +171,18 @@ fn init_internal(log_name: &str, with_opentelemetry: bool) {
173171
global::set_tracer_provider(tracer_provider.clone());
174172

175173
let tracer = tracer_provider.tracer("linera");
176-
let opentelemetry_layer = OpenTelemetryLayer::new(tracer);
174+
175+
let telemetry_only_filter =
176+
filter_fn(|metadata| metadata.is_span() && metadata.target() == "telemetry_only");
177+
178+
let otel_env_filter = tracing_subscriber::EnvFilter::builder()
179+
.with_default_directive(tracing_subscriber::filter::LevelFilter::INFO.into())
180+
.from_env_lossy();
181+
182+
let opentelemetry_filter = otel_env_filter.or(telemetry_only_filter);
183+
184+
let opentelemetry_layer =
185+
OpenTelemetryLayer::new(tracer).with_filter(opentelemetry_filter);
177186

178187
tracing_subscriber::registry()
179188
.with(env_filter)

linera-chain/src/chain.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use linera_views::{
3535
views::{ClonableView, CryptoHashView, RootView, View},
3636
};
3737
use serde::{Deserialize, Serialize};
38+
use tracing::instrument;
3839

3940
use crate::{
4041
block::{Block, ConfirmedBlock},
@@ -388,6 +389,9 @@ where
388389
self.context().extra().chain_id()
389390
}
390391

392+
#[instrument(target = "telemetry_only", skip_all, fields(
393+
chain_id = %self.chain_id(),
394+
))]
391395
pub async fn query_application(
392396
&mut self,
393397
local_time: Timestamp,
@@ -405,6 +409,10 @@ where
405409
.with_execution_context(ChainExecutionContext::Query)
406410
}
407411

412+
#[instrument(target = "telemetry_only", skip_all, fields(
413+
chain_id = %self.chain_id(),
414+
application_id = %application_id
415+
))]
408416
pub async fn describe_application(
409417
&mut self,
410418
application_id: ApplicationId,
@@ -416,6 +424,11 @@ where
416424
.with_execution_context(ChainExecutionContext::DescribeApplication)
417425
}
418426

427+
#[instrument(target = "telemetry_only", skip_all, fields(
428+
chain_id = %self.chain_id(),
429+
target = %target,
430+
height = %height
431+
))]
419432
pub async fn mark_messages_as_received(
420433
&mut self,
421434
target: &ChainId,
@@ -503,6 +516,9 @@ where
503516

504517
/// Verifies that this chain is up-to-date and all the messages executed ahead of time
505518
/// have been properly received by now.
519+
#[instrument(target = "telemetry_only", skip_all, fields(
520+
chain_id = %self.chain_id()
521+
))]
506522
pub async fn validate_incoming_bundles(&self) -> Result<(), ChainError> {
507523
let chain_id = self.chain_id();
508524
let pairs = self.inboxes.try_load_all_entries().await?;
@@ -564,6 +580,11 @@ where
564580
/// round timeouts.
565581
///
566582
/// Returns `true` if incoming `Subscribe` messages created new outbox entries.
583+
#[instrument(target = "telemetry_only", skip_all, fields(
584+
chain_id = %self.chain_id(),
585+
origin = %origin,
586+
bundle_height = %bundle.height
587+
))]
567588
pub async fn receive_message_bundle(
568589
&mut self,
569590
origin: &ChainId,
@@ -658,6 +679,9 @@ where
658679
}
659680

660681
/// Removes the incoming message bundles in the block from the inboxes.
682+
#[instrument(target = "telemetry_only", skip_all, fields(
683+
chain_id = %self.chain_id(),
684+
))]
661685
pub async fn remove_bundles_from_inboxes(
662686
&mut self,
663687
timestamp: Timestamp,
@@ -747,6 +771,10 @@ where
747771

748772
/// Executes a block: first the incoming messages, then the main operation.
749773
/// Does not update chain state other than the execution state.
774+
#[instrument(target = "telemetry_only", skip_all, fields(
775+
chain_id = %block.chain_id,
776+
block_height = %block.height
777+
))]
750778
#[expect(clippy::too_many_arguments)]
751779
async fn execute_block_inner(
752780
chain: &mut ExecutionStateView<C>,
@@ -856,6 +884,10 @@ where
856884

857885
/// Executes a block: first the incoming messages, then the main operation.
858886
/// Does not update chain state other than the execution state.
887+
#[instrument(target = "telemetry_only", skip_all, fields(
888+
chain_id = %self.chain_id(),
889+
block_height = %block.height
890+
))]
859891
pub async fn execute_block(
860892
&mut self,
861893
block: &ProposedBlock,
@@ -915,6 +947,11 @@ where
915947

916948
/// Applies an execution outcome to the chain, updating the outboxes, state hash and chain
917949
/// manager. This does not touch the execution state itself, which must be updated separately.
950+
/// Returns the set of event streams that were updated as a result of applying the block.
951+
#[instrument(target = "telemetry_only", skip_all, fields(
952+
chain_id = %self.chain_id(),
953+
block_height = %block.inner().inner().header.height
954+
))]
918955
pub async fn apply_confirmed_block(
919956
&mut self,
920957
block: &ConfirmedBlock,
@@ -947,6 +984,10 @@ where
947984
}
948985

949986
/// Adds a block to `preprocessed_blocks`, and updates the outboxes where possible.
987+
#[instrument(target = "telemetry_only", skip_all, fields(
988+
chain_id = %self.chain_id(),
989+
block_height = %block.inner().inner().header.height
990+
))]
950991
pub async fn preprocess_block(&mut self, block: &ConfirmedBlock) -> Result<(), ChainError> {
951992
let hash = block.inner().hash();
952993
let block = block.inner().inner();
@@ -969,6 +1010,10 @@ where
9691010
}
9701011

9711012
/// Verifies that the block is valid according to the chain's application permission settings.
1013+
#[instrument(target = "telemetry_only", skip_all, fields(
1014+
block_height = %block.height,
1015+
num_transactions = %block.transactions.len()
1016+
))]
9721017
fn check_app_permissions(
9731018
app_permissions: &ApplicationPermissions,
9741019
block: &ProposedBlock,
@@ -1011,6 +1056,10 @@ where
10111056
}
10121057

10131058
/// Returns the hashes of all blocks we have in the given range.
1059+
#[instrument(target = "telemetry_only", skip_all, fields(
1060+
chain_id = %self.chain_id(),
1061+
next_block_height = %self.tip_state.get().next_block_height
1062+
))]
10141063
pub async fn block_hashes(
10151064
&self,
10161065
range: impl RangeBounds<BlockHeight>,
@@ -1056,6 +1105,10 @@ where
10561105
/// Updates the outboxes with the messages sent in the block.
10571106
///
10581107
/// Returns the set of all recipients.
1108+
#[instrument(target = "telemetry_only", skip_all, fields(
1109+
chain_id = %self.chain_id(),
1110+
block_height = %block.header.height
1111+
))]
10591112
async fn process_outgoing_messages(
10601113
&mut self,
10611114
block: &Block,

linera-core/src/chain_worker/actor.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ where
244244
/// Runs the worker until there are no more incoming requests.
245245
#[instrument(
246246
skip_all,
247-
fields(chain_id = format!("{:.8}", self.chain_id)),
247+
fields(chain_id = format!("{:.8}", self.chain_id), long_lived_services = %self.config.long_lived_services),
248248
)]
249249
async fn handle_requests(
250250
self,
@@ -276,9 +276,12 @@ where
276276
self.chain_id,
277277
service_runtime_endpoint,
278278
)
279+
.instrument(span.clone())
279280
.await?;
280281

281-
Box::pin(worker.handle_request(request).instrument(span)).await;
282+
Box::pin(worker.handle_request(request))
283+
.instrument(span)
284+
.await;
282285

283286
loop {
284287
futures::select! {
@@ -287,7 +290,7 @@ where
287290
let Some((request, span)) = maybe_request else {
288291
break; // Request sender was dropped.
289292
};
290-
Box::pin(worker.handle_request(request).instrument(span)).await;
293+
Box::pin(worker.handle_request(request)).instrument(span).await;
291294
}
292295
}
293296
}

0 commit comments

Comments
 (0)