Add preserve_ordering field to oldest strategy in sequence scheduler config (#97)

rmccorm4 · web-flow · commit a2de06f4c80b · 2023-08-15T12:06:26.000-07:00
diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto
@@ -1457,6 +1457,28 @@ message ModelSequenceBatching
     //@@       wait for additional requests for batching. Default is 0.
     //@@
     uint64 max_queue_delay_microseconds = 3;
+
+    //@@    .. cpp:var:: bool preserve_ordering
+    //@@
+    //@@       Should the dynamic batcher preserve the ordering of responses to
+    //@@       match the order of requests received by the scheduler. Default is
+    //@@       false. If true, the responses will be returned in the same order as
+    //@@       the order of requests sent to the scheduler. If false, the responses
+    //@@       may be returned in arbitrary order. This option is specifically
+    //@@       needed when a sequence of related inference requests (i.e. inference
+    //@@       requests with the same correlation ID) are sent to the dynamic
+    //@@       batcher to ensure that the sequence responses are in the correct
+    //@@       order.
+    //@@
+    //@@       When using decoupled models, setting this to true may block the
+    //@@       responses from independent sequences from being returned to the
+    //@@       client until the previous request completes, hurting overall
+    //@@       performance. If using GRPC streaming protocol, the stream ordering
+    //@@       guarantee may be sufficient alone to ensure the responses for each
+    //@@       sequence are returned in sequence-order without blocking based on
+    //@@       independent requests, depending on the use case.
+    //@@
+    bool preserve_ordering = 4;
   }
 
   //@@  .. cpp:var:: oneof strategy_choice