We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent da18f28 commit 08365eaCopy full SHA for 08365ea
protobuf/model_config.proto
@@ -1659,6 +1659,16 @@ message ModelEnsembling
1659
//@@ The models and the input / output mappings used within the ensemble.
1660
//@@
1661
repeated Step step = 1;
1662
+
1663
+ //@@ .. cpp:var:: uint32 max_inflight_responses
1664
+ //@@
1665
+ //@@ The maximum number of concurrent inflight responses from ensemble
1666
+ //@@ steps to downstream consumers. This limit prevents unbounded memory
1667
+ //@@ growth when decoupled models produce responses faster than downstream
1668
+ //@@ models can consume them. Default value is 0, which indicates that no
1669
+ //@@ limit is enforced (unlimited).
1670
1671
+ uint32 max_inflight_responses = 2;
1672
}
1673
1674
0 commit comments