Skip to content

Commit 9494a9d

Browse files
authored
cluster: removing exceptions (envoyproxy#37500)
Risk Level: low Testing: updated tests Docs Changes: n/a Release Notes: n/a envoyproxy/envoy-mobile#176 Signed-off-by: Alyssa Wilk <[email protected]>
1 parent e456b9b commit 9494a9d

33 files changed

+337
-207
lines changed

envoy/upstream/cluster_manager.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -274,11 +274,12 @@ class ClusterManager {
274274
* update. It can be overridden by setting `remove_ignored` to true while
275275
* calling removeCluster(). This is useful for clusters whose lifecycle
276276
* is managed with custom implementation, e.g., DFP clusters.
277-
* @return true if the action results in an add/update of a cluster.
277+
* @return true if the action results in an add/update of a cluster, an error
278+
* status if the config is invalid.
278279
*/
279-
virtual bool addOrUpdateCluster(const envoy::config::cluster::v3::Cluster& cluster,
280-
const std::string& version_info,
281-
const bool avoid_cds_removal = false) PURE;
280+
virtual absl::StatusOr<bool>
281+
addOrUpdateCluster(const envoy::config::cluster::v3::Cluster& cluster,
282+
const std::string& version_info, const bool avoid_cds_removal = false) PURE;
282283

283284
/**
284285
* Set a callback that will be invoked when all primary clusters have been initialized.
@@ -572,7 +573,7 @@ class ClusterManagerFactory {
572573
* The cluster manager initialize() method needs to be called right after this method.
573574
* Please check https://github.com/envoyproxy/envoy/issues/33218 for details.
574575
*/
575-
virtual ClusterManagerPtr
576+
virtual absl::StatusOr<ClusterManagerPtr>
576577
clusterManagerFromProto(const envoy::config::bootstrap::v3::Bootstrap& bootstrap) PURE;
577578

578579
/**

envoy/upstream/upstream.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1307,7 +1307,7 @@ class Cluster {
13071307
* time initialization. E.g., for a dynamic DNS cluster the initialize callback will be
13081308
* called when initial DNS resolution is complete.
13091309
*/
1310-
virtual void initialize(std::function<void()> callback) PURE;
1310+
virtual void initialize(std::function<absl::Status()> callback) PURE;
13111311

13121312
/**
13131313
* @return the phase in which the cluster is initialized at boot. This mechanism is used such that

source/common/upstream/cds_api_helper.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ CdsApiHelper::onConfigUpdate(const std::vector<Config::DecodedResourceRef>& adde
4646
fmt::format("{}: duplicate cluster {} found", cluster.name(), cluster.name()));
4747
continue;
4848
}
49-
if (cm_.addOrUpdateCluster(cluster, resource.get().version())) {
49+
auto update_or_error = cm_.addOrUpdateCluster(cluster, resource.get().version());
50+
if (!update_or_error.status().ok()) {
51+
exception_msgs.push_back(
52+
fmt::format("{}: {}", cluster.name(), update_or_error.status().message()));
53+
continue;
54+
}
55+
if (*update_or_error) {
5056
any_applied = true;
5157
ENVOY_LOG(debug, "{}: add/update cluster '{}'", name_, cluster.name());
5258
++added_or_updated;

source/common/upstream/cluster_manager_impl.cc

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,9 @@ void ClusterManagerInitHelper::addCluster(ClusterManagerCluster& cm_cluster) {
117117
ASSERT(state_ != State::AllClustersInitialized);
118118

119119
const auto initialize_cb = [&cm_cluster, this] {
120-
onClusterInit(cm_cluster);
120+
RETURN_IF_NOT_OK(onClusterInit(cm_cluster));
121121
cm_cluster.cluster().info()->configUpdateStats().warming_state_.set(0);
122+
return absl::OkStatus();
122123
};
123124
Cluster& cluster = cm_cluster.cluster();
124125

@@ -142,10 +143,11 @@ void ClusterManagerInitHelper::addCluster(ClusterManagerCluster& cm_cluster) {
142143
primary_init_clusters_.size(), secondary_init_clusters_.size());
143144
}
144145

145-
void ClusterManagerInitHelper::onClusterInit(ClusterManagerCluster& cluster) {
146+
absl::Status ClusterManagerInitHelper::onClusterInit(ClusterManagerCluster& cluster) {
146147
ASSERT(state_ != State::AllClustersInitialized);
147-
per_cluster_init_callback_(cluster);
148+
RETURN_IF_NOT_OK(per_cluster_init_callback_(cluster));
148149
removeCluster(cluster);
150+
return absl::OkStatus();
149151
}
150152

151153
void ClusterManagerInitHelper::removeCluster(ClusterManagerCluster& cluster) {
@@ -186,7 +188,7 @@ void ClusterManagerInitHelper::initializeSecondaryClusters() {
186188
ClusterManagerCluster* cluster = iter->second;
187189
ENVOY_LOG(debug, "initializing secondary cluster {}", iter->first);
188190
++iter;
189-
cluster->cluster().initialize([cluster, this] { onClusterInit(*cluster); });
191+
cluster->cluster().initialize([cluster, this] { return onClusterInit(*cluster); });
190192
}
191193
}
192194

@@ -309,15 +311,16 @@ ClusterManagerImpl::ClusterManagerImpl(
309311
AccessLog::AccessLogManager& log_manager, Event::Dispatcher& main_thread_dispatcher,
310312
OptRef<Server::Admin> admin, ProtobufMessage::ValidationContext& validation_context,
311313
Api::Api& api, Http::Context& http_context, Grpc::Context& grpc_context,
312-
Router::Context& router_context, Server::Instance& server)
314+
Router::Context& router_context, Server::Instance& server, absl::Status& creation_status)
313315
: server_(server), factory_(factory), runtime_(runtime), stats_(stats), tls_(tls),
314316
random_(api.randomGenerator()),
315317
deferred_cluster_creation_(bootstrap.cluster_manager().enable_deferred_cluster_creation()),
316318
bind_config_(bootstrap.cluster_manager().has_upstream_bind_config()
317319
? absl::make_optional(bootstrap.cluster_manager().upstream_bind_config())
318320
: absl::nullopt),
319321
local_info_(local_info), cm_stats_(generateStats(*stats.rootScope())),
320-
init_helper_(*this, [this](ClusterManagerCluster& cluster) { onClusterInit(cluster); }),
322+
init_helper_(*this,
323+
[this](ClusterManagerCluster& cluster) { return onClusterInit(cluster); }),
321324
time_source_(main_thread_dispatcher.timeSource()), dispatcher_(main_thread_dispatcher),
322325
http_context_(http_context), validation_context_(validation_context),
323326
router_context_(router_context), cluster_stat_names_(stats.symbolTable()),
@@ -345,9 +348,10 @@ ClusterManagerImpl::ClusterManagerImpl(
345348
if (cm_config.has_outlier_detection()) {
346349
const std::string event_log_file_path = cm_config.outlier_detection().event_log_path();
347350
if (!event_log_file_path.empty()) {
348-
outlier_event_logger_ = THROW_OR_RETURN_VALUE(
349-
Outlier::EventLoggerImpl::create(log_manager, event_log_file_path, time_source_),
350-
std::unique_ptr<Outlier::EventLoggerImpl>);
351+
auto outlier_or_error =
352+
Outlier::EventLoggerImpl::create(log_manager, event_log_file_path, time_source_);
353+
SET_AND_RETURN_IF_NOT_OK(outlier_or_error.status(), creation_status);
354+
outlier_event_logger_ = std::move(*outlier_or_error);
351355
}
352356
}
353357

@@ -693,7 +697,7 @@ ClusterManagerImpl::ThreadLocalClusterManagerImpl::generateStats(Stats::Scope& s
693697
return {ALL_THREAD_LOCAL_CLUSTER_MANAGER_STATS(POOL_GAUGE_PREFIX(scope, final_prefix))};
694698
}
695699

696-
void ClusterManagerImpl::onClusterInit(ClusterManagerCluster& cm_cluster) {
700+
absl::Status ClusterManagerImpl::onClusterInit(ClusterManagerCluster& cm_cluster) {
697701
// This routine is called when a cluster has finished initializing. The cluster has not yet
698702
// been setup for cross-thread updates to avoid needless updates during initialization. The order
699703
// of operations here is important. We start by initializing the thread aware load balancer if
@@ -710,7 +714,7 @@ void ClusterManagerImpl::onClusterInit(ClusterManagerCluster& cm_cluster) {
710714
cluster_data = active_clusters_.find(cluster.info()->name());
711715

712716
if (cluster_data->second->thread_aware_lb_ != nullptr) {
713-
THROW_IF_NOT_OK(cluster_data->second->thread_aware_lb_->initialize());
717+
RETURN_IF_NOT_OK(cluster_data->second->thread_aware_lb_->initialize());
714718
}
715719

716720
// Now setup for cross-thread updates.
@@ -793,6 +797,7 @@ void ClusterManagerImpl::onClusterInit(ClusterManagerCluster& cm_cluster) {
793797
// clusters being added/updated. We could gate the below update on hosts being available on
794798
// the cluster or the cluster not already existing, but the special logic is not worth it.
795799
postThreadLocalClusterUpdate(cm_cluster, std::move(params));
800+
return absl::OkStatus();
796801
}
797802

798803
bool ClusterManagerImpl::scheduleUpdate(ClusterManagerCluster& cluster, uint32_t priority,
@@ -870,9 +875,10 @@ void ClusterManagerImpl::applyUpdates(ClusterManagerCluster& cluster, uint32_t p
870875
updates.last_updated_ = time_source_.monotonicTime();
871876
}
872877

873-
bool ClusterManagerImpl::addOrUpdateCluster(const envoy::config::cluster::v3::Cluster& cluster,
874-
const std::string& version_info,
875-
const bool avoid_cds_removal) {
878+
absl::StatusOr<bool>
879+
ClusterManagerImpl::addOrUpdateCluster(const envoy::config::cluster::v3::Cluster& cluster,
880+
const std::string& version_info,
881+
const bool avoid_cds_removal) {
876882
// First we need to see if this new config is new or an update to an existing dynamic cluster.
877883
// We don't allow updates to statically configured clusters in the main configuration. We check
878884
// both the warming clusters and the active clusters to see if we need an update or the update
@@ -924,7 +930,7 @@ bool ClusterManagerImpl::addOrUpdateCluster(const envoy::config::cluster::v3::Cl
924930
auto status_or_cluster =
925931
loadCluster(cluster, new_hash, version_info, /*added_via_api=*/true,
926932
/*required_for_ads=*/false, warming_clusters_, avoid_cds_removal);
927-
THROW_IF_NOT_OK_REF(status_or_cluster.status());
933+
RETURN_IF_NOT_OK_REF(status_or_cluster.status());
928934
const ClusterDataPtr previous_cluster = std::move(status_or_cluster.value());
929935
auto& cluster_entry = warming_clusters_.at(cluster_name);
930936
cluster_entry->cluster_->info()->configUpdateStats().warming_state_.set(1);
@@ -938,7 +944,7 @@ bool ClusterManagerImpl::addOrUpdateCluster(const envoy::config::cluster::v3::Cl
938944
auto state_changed_cluster_entry = warming_clusters_.find(cluster_name);
939945
state_changed_cluster_entry->second->cluster_->info()->configUpdateStats().warming_state_.set(
940946
0);
941-
onClusterInit(*state_changed_cluster_entry->second);
947+
return onClusterInit(*state_changed_cluster_entry->second);
942948
});
943949
}
944950

@@ -2296,13 +2302,15 @@ void ClusterManagerImpl::ThreadLocalClusterManagerImpl::tcpConnPoolIsIdle(
22962302
}
22972303
}
22982304

2299-
ClusterManagerPtr ProdClusterManagerFactory::clusterManagerFromProto(
2305+
absl::StatusOr<ClusterManagerPtr> ProdClusterManagerFactory::clusterManagerFromProto(
23002306
const envoy::config::bootstrap::v3::Bootstrap& bootstrap) {
2307+
absl::Status creation_status = absl::OkStatus();
23012308
auto cluster_manager_impl = std::unique_ptr<ClusterManagerImpl>{new ClusterManagerImpl(
23022309
bootstrap, *this, context_, stats_, tls_, context_.runtime(), context_.localInfo(),
23032310
context_.accessLogManager(), context_.mainThreadDispatcher(), context_.admin(),
23042311
context_.messageValidationContext(), context_.api(), http_context_, context_.grpcContext(),
2305-
context_.routerContext(), server_)};
2312+
context_.routerContext(), server_, creation_status)};
2313+
RETURN_IF_NOT_OK(creation_status);
23062314
return cluster_manager_impl;
23072315
}
23082316

source/common/upstream/cluster_manager_impl.h

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class ProdClusterManagerFactory : public ClusterManagerFactory {
6565
server_(server) {}
6666

6767
// Upstream::ClusterManagerFactory
68-
ClusterManagerPtr
68+
absl::StatusOr<ClusterManagerPtr>
6969
clusterManagerFromProto(const envoy::config::bootstrap::v3::Bootstrap& bootstrap) override;
7070
Http::ConnectionPool::InstancePtr allocateConnPool(
7171
Event::Dispatcher& dispatcher, HostConstSharedPtr host, ResourcePriority priority,
@@ -147,7 +147,7 @@ class ClusterManagerInitHelper : Logger::Loggable<Logger::Id::upstream> {
147147
*/
148148
ClusterManagerInitHelper(
149149
ClusterManager& cm,
150-
const std::function<void(ClusterManagerCluster&)>& per_cluster_init_callback)
150+
const std::function<absl::Status(ClusterManagerCluster&)>& per_cluster_init_callback)
151151
: cm_(cm), per_cluster_init_callback_(per_cluster_init_callback) {}
152152

153153
enum class State {
@@ -190,10 +190,10 @@ class ClusterManagerInitHelper : Logger::Loggable<Logger::Id::upstream> {
190190

191191
void initializeSecondaryClusters();
192192
void maybeFinishInitialize();
193-
void onClusterInit(ClusterManagerCluster& cluster);
193+
absl::Status onClusterInit(ClusterManagerCluster& cluster);
194194

195195
ClusterManager& cm_;
196-
std::function<void(ClusterManagerCluster& cluster)> per_cluster_init_callback_;
196+
std::function<absl::Status(ClusterManagerCluster& cluster)> per_cluster_init_callback_;
197197
CdsApi* cds_{};
198198
ClusterManager::PrimaryClustersReadyCallback primary_clusters_initialized_callback_;
199199
ClusterManager::InitializationCompleteCallback initialized_callback_;
@@ -251,9 +251,9 @@ class ClusterManagerImpl : public ClusterManager,
251251
std::size_t warmingClusterCount() const { return warming_clusters_.size(); }
252252

253253
// Upstream::ClusterManager
254-
bool addOrUpdateCluster(const envoy::config::cluster::v3::Cluster& cluster,
255-
const std::string& version_info,
256-
const bool avoid_cds_removal = false) override;
254+
absl::StatusOr<bool> addOrUpdateCluster(const envoy::config::cluster::v3::Cluster& cluster,
255+
const std::string& version_info,
256+
const bool avoid_cds_removal = false) override;
257257

258258
void setPrimaryClustersInitializedCb(PrimaryClustersReadyCallback callback) override {
259259
init_helper_.setPrimaryClustersInitializedCb(callback);
@@ -381,16 +381,14 @@ class ClusterManagerImpl : public ClusterManager,
381381
protected:
382382
// ClusterManagerImpl's constructor should not be invoked directly; create instances from the
383383
// clusterManagerFromProto() static method. The init() method must be called after construction.
384-
ClusterManagerImpl(const envoy::config::bootstrap::v3::Bootstrap& bootstrap,
385-
ClusterManagerFactory& factory,
386-
Server::Configuration::CommonFactoryContext& context, Stats::Store& stats,
387-
ThreadLocal::Instance& tls, Runtime::Loader& runtime,
388-
const LocalInfo::LocalInfo& local_info,
389-
AccessLog::AccessLogManager& log_manager,
390-
Event::Dispatcher& main_thread_dispatcher, OptRef<Server::Admin> admin,
391-
ProtobufMessage::ValidationContext& validation_context, Api::Api& api,
392-
Http::Context& http_context, Grpc::Context& grpc_context,
393-
Router::Context& router_context, Server::Instance& server);
384+
ClusterManagerImpl(
385+
const envoy::config::bootstrap::v3::Bootstrap& bootstrap, ClusterManagerFactory& factory,
386+
Server::Configuration::CommonFactoryContext& context, Stats::Store& stats,
387+
ThreadLocal::Instance& tls, Runtime::Loader& runtime, const LocalInfo::LocalInfo& local_info,
388+
AccessLog::AccessLogManager& log_manager, Event::Dispatcher& main_thread_dispatcher,
389+
OptRef<Server::Admin> admin, ProtobufMessage::ValidationContext& validation_context,
390+
Api::Api& api, Http::Context& http_context, Grpc::Context& grpc_context,
391+
Router::Context& router_context, Server::Instance& server, absl::Status& creation_status);
394392

395393
virtual void postThreadLocalRemoveHosts(const Cluster& cluster, const HostVector& hosts_removed);
396394

@@ -872,7 +870,7 @@ class ClusterManagerImpl : public ClusterManager,
872870
const std::string& version_info, bool added_via_api,
873871
bool required_for_ads, ClusterMap& cluster_map,
874872
bool avoid_cds_removal = false);
875-
void onClusterInit(ClusterManagerCluster& cluster);
873+
absl::Status onClusterInit(ClusterManagerCluster& cluster);
876874
void postThreadLocalHealthFailure(const HostSharedPtr& host);
877875
void updateClusterCounts();
878876
void clusterWarmingToActive(const std::string& cluster_name);

source/common/upstream/health_discovery_service.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ HdsDelegate::createHdsCluster(const envoy::config::cluster::v3::Cluster& cluster
211211
store_stats_, ssl_context_manager_, false, *info_factory_, tls_);
212212

213213
// Begin HCs in the background.
214-
new_cluster->initialize([] {});
214+
new_cluster->initialize([] { return absl::OkStatus(); });
215215
new_cluster->initHealthchecks();
216216

217217
return new_cluster;
@@ -547,7 +547,7 @@ void HdsCluster::initHealthchecks() {
547547
}
548548
}
549549

550-
void HdsCluster::initialize(std::function<void()> callback) {
550+
void HdsCluster::initialize(std::function<absl::Status()> callback) {
551551
initialization_complete_callback_ = callback;
552552

553553
// If this function gets called again we do not want to touch the priority set again with the

source/common/upstream/health_discovery_service.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class HdsCluster : public Cluster, Logger::Loggable<Logger::Id::upstream> {
5959
ClusterInfoConstSharedPtr info() const override { return info_; }
6060
Outlier::Detector* outlierDetector() override { return outlier_detector_.get(); }
6161
const Outlier::Detector* outlierDetector() const override { return outlier_detector_.get(); }
62-
void initialize(std::function<void()> callback) override;
62+
void initialize(std::function<absl::Status()> callback) override;
6363
// Compare changes in the cluster proto, and update parts of the cluster as needed.
6464
absl::Status update(envoy::config::cluster::v3::Cluster cluster,
6565
const envoy::config::core::v3::BindConfig& bind_config,
@@ -80,7 +80,7 @@ class HdsCluster : public Cluster, Logger::Loggable<Logger::Id::upstream> {
8080
Outlier::DetectorSharedPtr outlier_detector_;
8181

8282
private:
83-
std::function<void()> initialization_complete_callback_;
83+
std::function<absl::Status()> initialization_complete_callback_;
8484

8585
Server::Configuration::ServerFactoryContext& server_context_;
8686
envoy::config::cluster::v3::Cluster cluster_;

source/common/upstream/upstream_impl.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,7 +1736,7 @@ ResourceManager& ClusterInfoImpl::resourceManager(ResourcePriority priority) con
17361736
return *resource_managers_.managers_[enumToInt(priority)];
17371737
}
17381738

1739-
void ClusterImplBase::initialize(std::function<void()> callback) {
1739+
void ClusterImplBase::initialize(std::function<absl::Status()> callback) {
17401740
ASSERT(!initialization_started_);
17411741
ASSERT(initialization_complete_callback_ == nullptr);
17421742
initialization_complete_callback_ = callback;
@@ -1798,7 +1798,7 @@ void ClusterImplBase::finishInitialization() {
17981798
}
17991799

18001800
if (snapped_callback != nullptr) {
1801-
snapped_callback();
1801+
THROW_IF_NOT_OK(snapped_callback());
18021802
}
18031803
}
18041804

source/common/upstream/upstream_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,7 +1232,7 @@ class ClusterImplBase : public Cluster, protected Logger::Loggable<Logger::Id::u
12321232
ClusterInfoConstSharedPtr info() const override { return info_; }
12331233
Outlier::Detector* outlierDetector() override { return outlier_detector_.get(); }
12341234
const Outlier::Detector* outlierDetector() const override { return outlier_detector_.get(); }
1235-
void initialize(std::function<void()> callback) override;
1235+
void initialize(std::function<absl::Status()> callback) override;
12361236
UnitFloat dropOverload() const override { return drop_overload_; }
12371237
const std::string& dropCategory() const override { return drop_category_; }
12381238
void setDropOverload(UnitFloat drop_overload) override { drop_overload_ = drop_overload; }
@@ -1301,7 +1301,7 @@ class ClusterImplBase : public Cluster, protected Logger::Loggable<Logger::Id::u
13011301
void reloadHealthyHosts(const HostSharedPtr& host);
13021302

13031303
bool initialization_started_{};
1304-
std::function<void()> initialization_complete_callback_;
1304+
std::function<absl::Status()> initialization_complete_callback_;
13051305
uint64_t pending_initialize_health_checks_{};
13061306
const bool local_cluster_;
13071307
Config::ConstMetadataSharedPoolSharedPtr const_metadata_shared_pool_;

source/extensions/common/aws/credentials_provider_impl.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ void MetadataCredentialsProviderBase::createCluster(bool new_timer) {
191191
cluster_type_str, cluster_name_, host_port);
192192
}
193193

194-
context_->clusterManager().addOrUpdateCluster(cluster, "");
194+
THROW_IF_NOT_OK(context_->clusterManager().addOrUpdateCluster(cluster, "").status());
195195
}
196196

197197
// A thread local callback that occurs on every worker thread during cluster initialization.

0 commit comments

Comments
 (0)