@@ -943,7 +943,7 @@ message InferStatistics
943943 //@@ .. cpp:var:: StatisticDuration queue
944944 //@@
945945 //@@ The count and cumulative duration that inference requests wait in
946- //@@ scheduling or other queues. The "queue" count and cumulative
946+ //@@ scheduling or other queues. The "queue" count and cumulative
947947 //@@ duration includes cache hits.
948948 //@@
949949 StatisticDuration queue = 3 ;
@@ -985,29 +985,32 @@ message InferStatistics
985985 //@@ and extract output tensor data from the Response Cache on a cache
986986 //@@ hit. For example, this duration should include the time to copy
987987 //@@ output tensor data from the Response Cache to the response object.
988- //@@ On cache hits, triton does not need to go to the model/backend
988+ //@@ On cache hits, triton does not need to go to the model/backend
989989 //@@ for the output tensor data, so the "compute_input", "compute_infer",
990990 //@@ and "compute_output" fields are not updated. Assuming the response
991991 //@@ cache is enabled for a given model, a cache hit occurs for a
992992 //@@ request to that model when the request metadata (model name,
993993 //@@ model version, model inputs) hashes to an existing entry in the
994994 //@@ cache. On a cache miss, the request hash and response output tensor
995995 //@@ data is added to the cache. See response cache docs for more info:
996- //@@ https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
996+ //@@
997+ //https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
997998 //@@
998999 StatisticDuration cache_hit = 7 ;
9991000
10001001 //@@ .. cpp:var:: StatisticDuration cache_miss
10011002 //@@
10021003 //@@ The count of response cache misses and cumulative duration to lookup
1003- //@@ and insert output tensor data from the computed response to the cache.
1004+ //@@ and insert output tensor data from the computed response to the
1005+ //cache.
10041006 //@@ For example, this duration should include the time to copy
10051007 //@@ output tensor data from the response object to the Response Cache.
10061008 //@@ Assuming the response cache is enabled for a given model, a cache
10071009 //@@ miss occurs for a request to that model when the request metadata
10081010 //@@ does NOT hash to an existing entry in the cache. See the response
10091011 //@@ cache docs for more info:
1010- //@@ https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
1012+ //@@
1013+ //https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
10111014 //@@
10121015 StatisticDuration cache_miss = 8 ;
10131016}
@@ -1056,7 +1059,7 @@ message InferBatchStatistics
10561059//@@
10571060//@@ Memory usage.
10581061//@@
1059- message MemoryUsage
1062+ message MemoryUsage
10601063{
10611064 //@@ .. cpp:var:: string type
10621065 //@@
@@ -1149,7 +1152,7 @@ message ModelStatistics
11491152 repeated InferBatchStatistics batch_stats = 7 ;
11501153
11511154 //@@ .. cpp:var:: MemoryUsage memory_usage (repeated)
1152- //@@
1155+ //@@
11531156 //@@ The memory usage detected during model loading, which may be used to
11541157 //@@ estimate the memory to be released once the model is unloaded. Note
11551158 //@@ that the estimation is inferenced by the profiling tools and
0 commit comments