@@ -265,6 +265,43 @@ message GPUOptions {
265265 // system memory size for better resource estimation of multi-tenancy(one
266266 // gpu with multiple model) use case.
267267 int32 gpu_system_memory_size_in_mb = 16 ;
268+
269+ // If true, save information needed for created a PjRt GPU client for
270+ // creating a client with remote devices.
271+ bool populate_pjrt_gpu_client_creation_info = 17 ;
272+
273+ // node_id for use when creating a PjRt GPU client with remote devices,
274+ // which enumerates jobs*tasks from a ServerDef.
275+ int32 node_id = 18 ;
276+
277+ // Whether to merge data transfer streams into the compute stream in the
278+ // same stream group. Stream merging helps reduce the overhead caused by
279+ // stream synchronization, especially when data transfers are frequent. For
280+ // example, setting "merge_host_to_device_stream = true" will make the
281+ // compute stream responsible for both computation and host to device memory
282+ // copy.
283+ message StreamMergeOptions {
284+ // If true, the compute stream will be used for host_to_device copy as
285+ // well. It's no longer necessary to record an event before the copy to
286+ // let the copy stream wait for the compute stream to finish. There is
287+ // also no need to wait for the copy to complete before executing the
288+ // callback function.
289+ bool merge_host_to_device_stream = 1 ;
290+
291+ // If true, the compute stream will be used for device_to_host copy as
292+ // well. It's no longer necessary to record an event before the copy to
293+ // let the copy stream wait for the compute stream to finish.
294+ bool merge_device_to_host_stream = 2 ;
295+
296+ // If true, the compute stream will be used for device_to_device copy as
297+ // well. It's no longer necessary to record an event before the copy to
298+ // let the copy stream wait for the compute stream of the sending device
299+ // to finish. There is also no need to wait for the compute stream of the
300+ // receiving device to finish if the copy is within the same device.
301+ bool merge_device_to_device_stream = 3 ;
302+ }
303+
304+ StreamMergeOptions stream_merge_options = 19 ;
268305 }
269306
270307 // Everything inside experimental is subject to change and is not subject
0 commit comments