@@ -1382,6 +1382,40 @@ message ModelSequenceBatching
13821382 //@@ The optional field to specify the initial state for the model.
13831383 //@@
13841384 repeated InitialState initial_state = 5 ;
1385+
1386+ //@@ .. cpp:var:: bool use_same_buffer_for_input_output
1387+ //@@
1388+ //@@ The optional field to use a single buffer for both input and output
1389+ //@@ state. Without this option, Triton allocates separate buffers
1390+ //@@ for input and output state
1391+ //@@ which can be problematic if the state size is
1392+ //@@ large. This option reduces the memory usage by allocating a single
1393+ //@@ buffer. Enabling this option is recommended whenever
1394+ //@@ the input state is processed before the output state is written.
1395+ //@@ When enabled the state
1396+ //@@ will always be updated independent of whether
1397+ //@@ TRITONBACKEND_StateUpdate is called
1398+ //@@ (however TRITONBACKEND_StateUpdate should still be called for
1399+ //@@ completeness).
1400+ //@@
1401+ //@@ The default value is false.
1402+ //@@
1403+ bool use_same_buffer_for_input_output = 6 ;
1404+
1405+ //@@ .. cpp:var:: bool use_growable_memory
1406+ //@@
1407+ //@@ The optional field to enable an implicit state buffer to grow
1408+ //@@ without reallocating or copying existing memory.
1409+ //@@ Additional memory will be appended to the end of the buffer and
1410+ //@@ existing data will be preserved.
1411+ //@@ This option is only available for CUDA memory and requires enabling
1412+ //@@ use_same_buffer_for_input_output. When using this option,
1413+ //@@ StateBuffer call will always return CUDA memory even if CPU memory
1414+ //@@ is requested.
1415+ //@@
1416+ //@@ The default value is false.
1417+ //@@
1418+ bool use_growable_memory = 7 ;
13851419 }
13861420
13871421 //@@ .. cpp:var:: message StrategyDirect
0 commit comments