@@ -1536,12 +1536,52 @@ EnsembleContext::ScheduleSteps(
15361536
15371537} // namespace
15381538
1539+ Status
1540+ EnsembleScheduler::ValidateConfig (const inference::ModelConfig& config)
1541+ {
1542+ // Validate max_ensemble_inflight_responses parameter if present
1543+ if (config.parameters ().contains (" max_ensemble_inflight_responses" )) {
1544+ const auto & param =
1545+ config.parameters ().at (" max_ensemble_inflight_responses" );
1546+ const std::string& value = param.string_value ();
1547+
1548+ try {
1549+ const int parsed = std::stoi (value);
1550+ if (parsed <= 0 ) {
1551+ return Status (
1552+ Status::Code::INVALID_ARG,
1553+ " Invalid 'max_ensemble_inflight_responses' for ensemble model '" +
1554+ config.name () + " ': value must be positive, got " +
1555+ std::to_string (parsed));
1556+ }
1557+ }
1558+ catch (const std::out_of_range& e) {
1559+ return Status (
1560+ Status::Code::INVALID_ARG,
1561+ " Invalid 'max_ensemble_inflight_responses' for ensemble model '" +
1562+ config.name () + " ': value exceeds maximum allowed (" +
1563+ std::to_string (INT_MAX) + " )" );
1564+ }
1565+ catch (const std::invalid_argument& e) {
1566+ return Status (
1567+ Status::Code::INVALID_ARG,
1568+ " Invalid 'max_ensemble_inflight_responses' for ensemble model '" +
1569+ config.name () + " ': cannot parse value '" + value + " '" );
1570+ }
1571+ }
1572+
1573+ return Status::Success;
1574+ }
1575+
15391576Status
15401577EnsembleScheduler::Create (
15411578 InferenceStatsAggregator* const stats_aggregator,
15421579 InferenceServer* const server, const ModelIdentifier& model_id,
15431580 const inference::ModelConfig& config, std::unique_ptr<Scheduler>* scheduler)
15441581{
1582+ // Validate configuration before constructing scheduler
1583+ RETURN_IF_ERROR (ValidateConfig (config));
1584+
15451585 scheduler->reset (
15461586 new EnsembleScheduler (stats_aggregator, server, model_id, config));
15471587 return Status::Success;
@@ -1696,32 +1736,15 @@ EnsembleScheduler::EnsembleScheduler(
16961736
16971737 // Parse backpressure configuration. Limits concurrent responses from
16981738 // decoupled steps to prevent memory growth.
1739+ // Configuration is already validated in ValidateConfig()
16991740 if (config.parameters ().contains (" max_ensemble_inflight_responses" )) {
17001741 const auto & param =
17011742 config.parameters ().at (" max_ensemble_inflight_responses" );
1702- const std::string& value = param.string_value ();
1703- try {
1704- const int64_t size = std::stoll (value);
1705- if (size > 0 ) {
1706- info_->max_inflight_responses_ = static_cast <size_t >(size);
1707- LOG_INFO << " Ensemble model '" << config.name ()
1708- << " ' configured with max_ensemble_inflight_responses: "
1709- << info_->max_inflight_responses_ ;
1710- } else {
1711- LOG_ERROR
1712- << " Ensemble model '" << config.name ()
1713- << " ': max_ensemble_inflight_responses must be greater than 0. "
1714- << " Received '" << size << " '. Falling back to default value ("
1715- << info_->max_inflight_responses_ << " )." ;
1716- }
1717- }
1718- catch (const std::exception& e) {
1719- LOG_ERROR << " Ensemble model '" << config.name ()
1720- << " ': failed to parse max_ensemble_inflight_responses='"
1721- << value << " ': " << e.what ()
1722- << " . Falling back to default value ("
1723- << info_->max_inflight_responses_ << " )." ;
1724- }
1743+ info_->max_inflight_responses_ =
1744+ static_cast <size_t >(std::stoi (param.string_value ()));
1745+ LOG_INFO << " Ensemble model '" << config.name ()
1746+ << " ' configured with max_ensemble_inflight_responses: "
1747+ << info_->max_inflight_responses_ ;
17251748 }
17261749}
17271750
0 commit comments