Skip to content

Commit 5a4b9fd

Browse files
authored
chore(policy): polish policy controller logging (#13379)
There are a few things about the policy controller logging that can be cleaned up for consistency and clarity: * We frequently log ERROR messages when processing resources with unexpected values. These messages are more appropriately emitted at WARN--we want to surface these situations, but they are not really exceptional. * The leadership status of the status controller is not logged at INFO level, so it's not possible to know about status changes without DEBUG logging. * We generally use Sentence-cased log messages when emitting user-facing messages. There are a few situations where we are not consistent. * The status controller reconciliation logging is somewhat noisy and misleading. * The status controller does not log any messages when patching resources. ``` DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder has changed DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder has changed DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder has changed DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l ``` The "Lease holder has changed" message actually indicates that the _lease_ has changed, though the holder may be unchanged. To improve logging clarity, this change does the following: * Adds an INFO level log when the leadership status of the controller changes. * Adds an INFO level log when the status controller patches resources. * Adds DEBUG level logs when the status controller patches resources. * Reconciliation housekeeping logging is moved to TRACE level. * Consistently uses sentence capitalization in user-facing log messages * Reduces ERROR messages to WARN when handling invalid user-provided data (including cluster resources). This ensures that ERRORs are reserved for exceptional policy controller states.
1 parent 82c47a9 commit 5a4b9fd

File tree

17 files changed

+196
-205
lines changed

17 files changed

+196
-205
lines changed

policy-controller/grpc/src/inbound.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ fn to_server(srv: &InboundServer, cluster_networks: &[IpNet]) -> proto::Server {
145145
kind: match srv.protocol {
146146
ProxyProtocol::Detect { timeout } => Some(proto::proxy_protocol::Kind::Detect(
147147
proto::proxy_protocol::Detect {
148-
timeout: timeout.try_into().map_err(|error| tracing::warn!(%error, "failed to convert protocol detect timeout to protobuf")).ok(),
148+
timeout: timeout.try_into().map_err(|error| tracing::warn!(%error, "Failed to convert protocol detect timeout to protobuf")).ok(),
149149
http_routes: http::to_route_list(&srv.http_routes, cluster_networks),
150150
http_local_rate_limit: srv.ratelimit.as_ref().map(to_rate_limit),
151151
},

policy-controller/grpc/src/outbound.rs

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -531,40 +531,49 @@ fn default_backend(policy: &OutboundPolicy, original_dst: Option<SocketAddr>) ->
531531
},
532532
)),
533533
},
534+
534535
ParentInfo::EgressNetwork {
535536
namespace, name, ..
536-
} => match original_dst {
537-
Some(original_dst) => outbound::Backend {
538-
metadata: Some(Metadata {
539-
kind: Some(metadata::Kind::Resource(api::meta::Resource {
540-
group: "policy.linkerd.io".to_string(),
541-
kind: "EgressNetwork".to_string(),
542-
name,
543-
namespace,
544-
section: Default::default(),
545-
port: u16::from(policy.port).into(),
546-
})),
547-
}),
537+
} => {
538+
debug_assert!(
539+
original_dst.is_some(),
540+
"Must not serve EgressNetwork for named lookups; IP:PORT required"
541+
);
542+
let metadata = Some(Metadata {
543+
kind: Some(metadata::Kind::Resource(api::meta::Resource {
544+
group: "policy.linkerd.io".to_string(),
545+
kind: "EgressNetwork".to_string(),
546+
name,
547+
namespace,
548+
section: Default::default(),
549+
port: u16::from(policy.port).into(),
550+
})),
551+
});
552+
553+
let Some(addr) = original_dst else {
554+
tracing::error!(
555+
?metadata,
556+
"Unexpected state: EgressNetworks should only be returned when lookup is by IP:PORT; synthesizing invalid backend"
557+
);
558+
return outbound::Backend {
559+
metadata,
560+
queue: None,
561+
kind: None,
562+
};
563+
};
564+
565+
outbound::Backend {
566+
metadata,
548567
queue: Some(default_queue_config()),
549568
kind: Some(outbound::backend::Kind::Forward(
550569
destination::WeightedAddr {
551-
addr: Some(original_dst.into()),
570+
addr: Some(addr.into()),
552571
weight: 1,
553572
..Default::default()
554573
},
555574
)),
556-
},
557-
None => {
558-
tracing::error!("no original_dst for Egresspolicy");
559-
outbound::Backend {
560-
metadata: Some(Metadata {
561-
kind: Some(metadata::Kind::Default("invalid".to_string())),
562-
}),
563-
queue: None,
564-
kind: None,
565-
}
566575
}
567-
},
576+
}
568577
}
569578
}
570579

@@ -632,7 +641,7 @@ pub(crate) fn convert_duration(
632641
duration
633642
.try_into()
634643
.map_err(|error| {
635-
tracing::error!(%error, "Invalid {name} duration");
644+
tracing::warn!(%error, "Invalid {name} duration");
636645
})
637646
.ok()
638647
}

policy-controller/grpc/src/outbound/grpc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -454,11 +454,11 @@ fn convert_to_filter(filter: Filter) -> outbound::grpc_route::Filter {
454454
convert_request_header_modifier_filter(filter),
455455
)),
456456
Filter::RequestRedirect(filter) => {
457-
tracing::warn!(filter = ?filter, "declining to convert invalid filter type for GrpcRoute");
457+
tracing::warn!(filter = ?filter, "Declining to convert invalid filter type for GrpcRoute");
458458
None
459459
}
460460
Filter::ResponseHeaderModifier(filter) => {
461-
tracing::warn!(filter = ?filter, "declining to convert invalid filter type for GrpcRoute");
461+
tracing::warn!(filter = ?filter, "Declining to convert invalid filter type for GrpcRoute");
462462
None
463463
}
464464
},

policy-controller/grpc/src/workload.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ impl FromStr for Workload {
2323
fn from_str(s: &str) -> Result<Self, tonic::Status> {
2424
if s.starts_with('{') {
2525
return serde_json::from_str(s).map_err(|error| {
26-
tracing::error!(%error, "Invalid {s} workload string");
26+
tracing::warn!(%error, "Invalid {s} workload string");
2727
tonic::Status::invalid_argument(format!("Invalid workload: {}", s))
2828
});
2929
}

policy-controller/k8s/api/src/labels.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ impl Expression {
182182
(Operator::Exists, key, None) => labels.contains_key(key),
183183
(Operator::DoesNotExist, key, None) => !labels.contains_key(key),
184184
(operator, key, values) => {
185-
tracing::warn!(?operator, %key, ?values, "illegal match expression");
185+
tracing::warn!(?operator, %key, ?values, "Illegal match expression");
186186
false
187187
}
188188
}

policy-controller/k8s/index/src/inbound/index.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ impl kubert::index::IndexNamespacedResource<k8s::Pod> for Index {
478478
Ok(None) => {}
479479
Ok(Some(pod)) => pod.reindex_servers(&ns.policy, &self.authentications),
480480
Err(error) => {
481-
tracing::error!(%error, "Illegal pod update");
481+
tracing::warn!(%error, "Illegal pod update");
482482
}
483483
}
484484
}
@@ -523,7 +523,7 @@ impl kubert::index::IndexNamespacedResource<k8s::external_workload::ExternalWork
523523
// Update, so re-index
524524
Ok(Some(workload)) => workload.reindex_servers(&ns.policy, &self.authentications),
525525
Err(error) => {
526-
tracing::error!(%error, "Illegal external workload update");
526+
tracing::warn!(%error, "Illegal external workload update");
527527
}
528528
}
529529
}
@@ -626,7 +626,7 @@ impl kubert::index::IndexNamespacedResource<k8s::policy::ServerAuthorization> fo
626626
Ok(meta) => self.ns_or_default_with_reindex(ns, move |ns| {
627627
ns.policy.update_server_authz(name, meta)
628628
}),
629-
Err(error) => tracing::error!(%error, "Illegal server authorization update"),
629+
Err(error) => tracing::warn!(%error, "Illegal server authorization update"),
630630
}
631631
}
632632

@@ -660,7 +660,7 @@ impl kubert::index::IndexNamespacedResource<k8s::policy::ServerAuthorization> fo
660660
.added
661661
.push((name, saz)),
662662
Err(error) => {
663-
tracing::error!(ns = %namespace, %name, %error, "Illegal server authorization update")
663+
tracing::warn!(ns = %namespace, %name, %error, "Illegal server authorization update")
664664
}
665665
}
666666
}
@@ -746,7 +746,7 @@ impl kubert::index::IndexNamespacedResource<k8s::policy::AuthorizationPolicy> fo
746746
.added
747747
.push((name, spec)),
748748
Err(error) => {
749-
tracing::error!(ns = %namespace, %name, %error, "Illegal server authorization update")
749+
tracing::warn!(ns = %namespace, %name, %error, "Illegal server authorization update")
750750
}
751751
}
752752
}
@@ -985,7 +985,7 @@ impl kubert::index::IndexNamespacedResource<k8s::policy::HttpLocalRateLimitPolic
985985
.added
986986
.push((name, spec)),
987987
Err(error) => {
988-
tracing::error!(ns = %namespace, %name, %error, "Illegal server ratelimit update")
988+
tracing::warn!(ns = %namespace, %name, %error, "Illegal server ratelimit update")
989989
}
990990
}
991991
}

policy-controller/k8s/index/src/inbound/ratelimit_policy.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,15 @@ impl TryFrom<k8s::policy::HttpLocalRateLimitPolicy> for Spec {
8282
let type_ = match condition.type_.as_ref() {
8383
"Accepted" => ConditionType::Accepted,
8484
condition_type => {
85-
tracing::error!(%status.target_ref.name, %condition_type, "Unexpected condition type found in status");
85+
tracing::warn!(%status.target_ref.name, %condition_type, "Unexpected condition type found in status");
8686
return None;
8787
}
8888
};
8989
let status = match condition.status.as_ref() {
9090
"True" => true,
9191
"False" => false,
9292
condition_status => {
93-
tracing::error!(%status.target_ref.name, %type_, %condition_status, "Unexpected condition status found in status");
93+
tracing::warn!(%status.target_ref.name, %type_, %condition_status, "Unexpected condition status found in status");
9494
return None
9595
},
9696
};

policy-controller/k8s/index/src/inbound/routes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,15 @@ impl Status {
137137
let type_ = match condition.type_.as_ref() {
138138
"Accepted" => ConditionType::Accepted,
139139
condition_type => {
140-
tracing::error!(%status.parent_ref.name, %condition_type, "Unexpected condition type found in parent status");
140+
tracing::warn!(%status.parent_ref.name, %condition_type, "Unexpected condition type found in parent status");
141141
return None;
142142
}
143143
};
144144
let status = match condition.status.as_ref() {
145145
"True" => true,
146146
"False" => false,
147147
condition_status => {
148-
tracing::error!(%status.parent_ref.name, %type_, %condition_status, "Unexpected condition status found in parent status");
148+
tracing::warn!(%status.parent_ref.name, %type_, %condition_status, "Unexpected condition status found in parent status");
149149
return None
150150
},
151151
};

policy-controller/k8s/index/src/inbound/workload.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ fn container_http_probe_paths(
103103
match http::Uri::try_from(path) {
104104
Ok(uri) => Some((port, uri.path().to_string())),
105105
Err(error) => {
106-
tracing::warn!(%error, path, "invalid probe path");
106+
tracing::warn!(%error, path, "Invalid probe path");
107107
None
108108
}
109109
}
@@ -161,7 +161,7 @@ impl Settings {
161161
};
162162

163163
let default_policy = default_policy(anns).unwrap_or_else(|error| {
164-
tracing::warn!(%error, "invalid default policy annotation value");
164+
tracing::warn!(%error, "Invalid default policy annotation value");
165165
None
166166
});
167167

policy-controller/k8s/index/src/outbound/index.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -216,21 +216,21 @@ impl kubert::index::IndexNamespacedResource<Service> for Index {
216216
let ns = service.namespace().expect("Service must have a namespace");
217217
tracing::debug!(name, ns, "indexing service");
218218
let accrual = parse_accrual_config(service.annotations())
219-
.map_err(|error| tracing::error!(%error, service=name, namespace=ns, "failed to parse accrual config"))
219+
.map_err(|error| tracing::warn!(%error, service=name, namespace=ns, "Failed to parse accrual config"))
220220
.unwrap_or_default();
221221
let opaque_ports =
222222
ports_annotation(service.annotations(), "config.linkerd.io/opaque-ports")
223223
.unwrap_or_else(|| self.namespaces.cluster_info.default_opaque_ports.clone());
224224

225225
let timeouts = parse_timeouts(service.annotations())
226-
.map_err(|error| tracing::error!(%error, service=name, namespace=ns, "failed to parse timeouts"))
226+
.map_err(|error| tracing::warn!(%error, service=name, namespace=ns, "Failed to parse timeouts"))
227227
.unwrap_or_default();
228228

229229
let http_retry = http::parse_http_retry(service.annotations()).map_err(|error| {
230-
tracing::error!(%error, service=name, namespace=ns, "failed to parse http retry")
230+
tracing::warn!(%error, service=name, namespace=ns, "Failed to parse http retry")
231231
}).unwrap_or_default();
232232
let grpc_retry = grpc::parse_grpc_retry(service.annotations()).map_err(|error| {
233-
tracing::error!(%error, service=name, namespace=ns, "failed to parse grpc retry")
233+
tracing::warn!(%error, service=name, namespace=ns, "Failed to parse grpc retry")
234234
}).unwrap_or_default();
235235

236236
if let Some(cluster_ips) = service
@@ -252,7 +252,7 @@ impl kubert::index::IndexNamespacedResource<Service> for Index {
252252
self.services_by_ip.insert(addr, service_ref);
253253
}
254254
Err(error) => {
255-
tracing::error!(%error, service=name, cluster_ip, "invalid cluster ip");
255+
tracing::warn!(%error, service=name, cluster_ip, "Invalid cluster ip");
256256
}
257257
}
258258
}
@@ -318,7 +318,7 @@ impl kubert::index::IndexNamespacedResource<linkerd_k8s_api::EgressNetwork> for
318318
.expect("EgressNetwork must have a namespace");
319319
tracing::debug!(name, ns, "indexing EgressNetwork");
320320
let accrual = parse_accrual_config(egress_network.annotations())
321-
.map_err(|error| tracing::error!(%error, service=name, namespace=ns, "failed to parse accrual config"))
321+
.map_err(|error| tracing::warn!(%error, service=name, namespace=ns, "Failed to parse accrual config"))
322322
.unwrap_or_default();
323323
let opaque_ports = ports_annotation(
324324
egress_network.annotations(),
@@ -327,14 +327,14 @@ impl kubert::index::IndexNamespacedResource<linkerd_k8s_api::EgressNetwork> for
327327
.unwrap_or_else(|| self.namespaces.cluster_info.default_opaque_ports.clone());
328328

329329
let timeouts = parse_timeouts(egress_network.annotations())
330-
.map_err(|error| tracing::error!(%error, service=name, namespace=ns, "failed to parse timeouts"))
330+
.map_err(|error| tracing::warn!(%error, service=name, namespace=ns, "Failed to parse timeouts"))
331331
.unwrap_or_default();
332332

333333
let http_retry = http::parse_http_retry(egress_network.annotations()).map_err(|error| {
334-
tracing::error!(%error, service=name, namespace=ns, "failed to parse http retry")
334+
tracing::warn!(%error, service=name, namespace=ns, "Failed to parse http retry")
335335
}).unwrap_or_default();
336336
let grpc_retry = grpc::parse_grpc_retry(egress_network.annotations()).map_err(|error| {
337-
tracing::error!(%error, service=name, namespace=ns, "failed to parse grpc retry")
337+
tracing::warn!(%error, service=name, namespace=ns, "Failed to parse grpc retry")
338338
}).unwrap_or_default();
339339

340340
let egress_net_ref = ResourceRef {
@@ -678,7 +678,7 @@ impl Namespace {
678678
) {
679679
Ok(route) => route,
680680
Err(error) => {
681-
tracing::error!(%error, "failed to convert route");
681+
tracing::warn!(%error, "Failed to convert route");
682682
return;
683683
}
684684
};
@@ -773,7 +773,7 @@ impl Namespace {
773773
) {
774774
Ok(route) => route,
775775
Err(error) => {
776-
tracing::error!(%error, "failed to convert route");
776+
tracing::warn!(%error, "Failed to convert route");
777777
return;
778778
}
779779
};
@@ -868,7 +868,7 @@ impl Namespace {
868868
match tls::convert_route(&self.namespace, route.clone(), cluster_info, resource_info) {
869869
Ok(route) => route,
870870
Err(error) => {
871-
tracing::error!(%error, "failed to convert route");
871+
tracing::warn!(%error, "Failed to convert route");
872872
return;
873873
}
874874
};
@@ -964,7 +964,7 @@ impl Namespace {
964964
match tcp::convert_route(&self.namespace, route.clone(), cluster_info, resource_info) {
965965
Ok(route) => route,
966966
Err(error) => {
967-
tracing::error!(%error, "failed to convert route");
967+
tracing::warn!(%error, "Failed to convert route");
968968
return;
969969
}
970970
};

0 commit comments

Comments
 (0)