[web] use shorter memory info name for WebGPU buffer and WebNN tensor (#27207)

fs-eire · web-flow · commit a5dc0f95d859 · 2026-02-01T20:31:02.000-08:00
### Description

This PR renames the following existing names for MemoryInfo:

- `WebGPU_Buffer` -&gt; `WebGPU_Buf`
- `WebNN_Tensor` -&gt; `WebNN_Ten`

### Motivation and Context

the `OrtMemoryInfo` uses a `std::string` to store the name. modern C++
compilers uses "small string optimization" (SSO) to avoid an extra
memory allocation if the string is small enough.

While different compiler may have different implementation, the
following test program is used to test what exact limit is for a certain
compiler:

```c++
#include &lt;string&gt;
#include &lt;cstdio&gt;

int main() {
  std::string webgpu0 = "WebGPU_Buf";
  std::string webgpu1 = "WebGPU_Buff";
  std::string webgpu2 = "WebGPU_Buffe";
  std::string webgpu3 = "WebGPU_Buffer";

  printf("=========== %s\n string address: %p\n data address  : %p\n\n", webgpu0.c_str(), (void*)&amp;webgpu0, (void*)webgpu0.data());
  printf("=========== %s\n string address: %p\n data address  : %p\n\n", webgpu1.c_str(), (void*)&amp;webgpu1, (void*)webgpu1.data());
  printf("=========== %s\n string address: %p\n data address  : %p\n\n", webgpu2.c_str(), (void*)&amp;webgpu2, (void*)webgpu2.data());
  printf("=========== %s\n string address: %p\n data address  : %p\n\n", webgpu3.c_str(), (void*)&amp;webgpu3, (void*)webgpu3.data());

  return 0;
}
```

While using emscripten (targetting wasm32), the runtime result is like
this:
```
=========== WebGPU_Buf
 string address: 0x10db0
 data address  : 0x10db0

=========== WebGPU_Buff
 string address: 0x10da4
 data address  : 0x10dc8

=========== WebGPU_Buffe
 string address: 0x10d98
 data address  : 0x10de0

=========== WebGPU_Buffer
 string address: 0x10d8c
 data address  : 0x10df8
```

Which shows that the string need to be no more than 10 bytes (exclude
the '\0' at end) to enable SSO.
diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h
@@ -85,8 +85,8 @@ constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
 constexpr const char* OpenVINO_RT = "OpenVINO_RT";
 constexpr const char* OpenVINO_RT_NPU = "OpenVINO_RT_NPU";
 constexpr const char* QNN_HTP_SHARED = "QnnHtpShared";
-constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
-constexpr const char* WEBNN_TENSOR = "WebNN_Tensor";
+constexpr const char* WEBGPU_BUFFER = "WebGPU_Buf";  // limited to 10 chars to ensure std::string SSO for web
+constexpr const char* WEBNN_TENSOR = "WebNN_Ten";    // limited to 10 chars to ensure std::string SSO for web
 
 constexpr size_t kAllocAlignment = 256;
 constexpr const size_t kAlloc4KAlignment = 4096;
diff --git a/js/node/src/inference_session_wrap.cc b/js/node/src/inference_session_wrap.cc
@@ -181,7 +181,7 @@ Napi::Value InferenceSessionWrap::Run(const Napi::CallbackInfo& info) {
   size_t inputIndex = 0;
   size_t outputIndex = 0;
   Ort::MemoryInfo cpuMemoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
-  Ort::MemoryInfo gpuBufferMemoryInfo{"WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault};
+  Ort::MemoryInfo gpuBufferMemoryInfo{"WebGPU_Buf", OrtDeviceAllocator, 0, OrtMemTypeDefault};
 
   try {
     for (auto& name : inputNames_) {
diff --git a/js/node/src/tensor_helper.cc b/js/node/src/tensor_helper.cc
@@ -251,7 +251,7 @@ Napi::Value OrtValueToNapiValue(Napi::Env env, Ort::Value&& value) {
   // location
   auto memoryInfo = value.GetTensorMemoryInfo();
   bool isGpuBuffer = memoryInfo.GetDeviceType() == OrtMemoryInfoDeviceType_GPU &&
-                     memoryInfo.GetAllocatorName() == "WebGPU_Buffer";
+                     memoryInfo.GetAllocatorName() == "WebGPU_Buf";
 
   // size
   auto size = tensorTypeAndShapeInfo.GetElementCount();
diff --git a/onnxruntime/contrib_ops/webgpu/quantization/gather_block_quantized.cc b/onnxruntime/contrib_ops/webgpu/quantization/gather_block_quantized.cc
@@ -149,7 +149,7 @@ Status GatherBlockQuantized::ComputeInternal(ComputeContext& context) const {
     TensorShape data_representation_4bit_shape{x->Shape()};
     MLDataType new_dtype = (x_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) ? DataTypeImpl::GetType<UInt4x2>() : DataTypeImpl::GetType<Int4x2>();
     auto memory_info = OrtMemoryInfo{
-        "WebGPU_Buffer",
+        WEBGPU_BUFFER,
         OrtDeviceAllocator,
         OrtDevice{OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NONE, 0}};
 
diff --git a/onnxruntime/wasm/api.cc b/onnxruntime/wasm/api.cc
@@ -393,10 +393,10 @@ OrtValue* OrtCreateTensor(int data_type, void* data, size_t data_length, size_t*
     OrtMemoryInfo* memory_info = nullptr;
     switch (data_location) {
       case DATA_LOCATION_GPU_BUFFER:
-        RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+        RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebGPU_Buf", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
         break;
       case DATA_LOCATION_ML_TENSOR:
-        RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebNN_Tensor", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+        RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebNN_Ten", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
         break;
       default:
         RETURN_NULLPTR_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
@@ -563,9 +563,9 @@ int EMSCRIPTEN_KEEPALIVE OrtBindOutput(OrtIoBinding* io_binding,
     if (output_location != DATA_LOCATION_GPU_BUFFER && output_location != DATA_LOCATION_ML_TENSOR) {
       RETURN_ERROR_CODE_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
     } else if (output_location == DATA_LOCATION_ML_TENSOR) {
-      RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebNN_Tensor", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+      RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebNN_Ten", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
     } else {
-      RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
+      RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebGPU_Buf", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
     }
     REGISTER_AUTO_RELEASE_HANDLE(MemoryInfo, memory_info);
     return CHECK_STATUS(BindOutputToDevice, io_binding, name, memory_info);