@@ -1031,6 +1031,33 @@ message InferBatchStatistics
10311031 StatisticDuration compute_output = 4 ;
10321032}
10331033
1034+ //@@
1035+ //@@.. cpp:var:: message MemoryUsage
1036+ //@@
1037+ //@@ Memory usage.
1038+ //@@
1039+ message MemoryUsage
1040+ {
1041+ //@@ .. cpp:var:: string type
1042+ //@@
1043+ //@@ The type of memory, the value can be "CPU", "CPU_PINNED", "GPU".
1044+ //@@
1045+ string type = 1 ;
1046+
1047+ //@@ .. cpp:var:: int64 id
1048+ //@@
1049+ //@@ The id of the memory, typically used with "type" to identify
1050+ //@@ a device that hosts the memory.
1051+ //@@
1052+ int64 id = 2 ;
1053+
1054+ //@@ .. cpp:var:: uint64 byte_size
1055+ //@@
1056+ //@@ The byte size of the memory.
1057+ //@@
1058+ uint64 byte_size = 3 ;
1059+ }
1060+
10341061//@@
10351062//@@.. cpp:var:: message ModelStatistics
10361063//@@
@@ -1100,6 +1127,19 @@ message ModelStatistics
11001127 //@@ compute).
11011128 //@@
11021129 repeated InferBatchStatistics batch_stats = 7 ;
1130+
1131+ //@@ .. cpp:var:: MemoryUsage memory_usage (repeated)
1132+ //@@
1133+ //@@ The memory usage detected during model loading, which may be used to
1134+ //@@ estimate the memory to be released once the model is unloaded. Note
1135+ //@@ that the estimation is inferenced by the profiling tools and
1136+ //@@ framework's memory schema, therefore it is advised to perform
1137+ //@@ experiments to understand the scenario that the reported memory usage
1138+ //@@ can be relied on. As a starting point, the GPU memory usage for
1139+ //@@ models in ONNX Runtime backend and TensorRT backend is usually
1140+ //@@ aligned.
1141+ //@@
1142+ repeated MemoryUsage memory_usage = 8 ;
11031143}
11041144
11051145//@@
0 commit comments