Skip to content

Commit a5dc0f9

Browse files
authored
[web] use shorter memory info name for WebGPU buffer and WebNN tensor (#27207)
### Description This PR renames the following existing names for MemoryInfo: - `WebGPU_Buffer` -> `WebGPU_Buf` - `WebNN_Tensor` -> `WebNN_Ten` ### Motivation and Context the `OrtMemoryInfo` uses a `std::string` to store the name. modern C++ compilers uses "small string optimization" (SSO) to avoid an extra memory allocation if the string is small enough. While different compiler may have different implementation, the following test program is used to test what exact limit is for a certain compiler: ```c++ #include <string> #include <cstdio> int main() { std::string webgpu0 = "WebGPU_Buf"; std::string webgpu1 = "WebGPU_Buff"; std::string webgpu2 = "WebGPU_Buffe"; std::string webgpu3 = "WebGPU_Buffer"; printf("=========== %s\n string address: %p\n data address : %p\n\n", webgpu0.c_str(), (void*)&webgpu0, (void*)webgpu0.data()); printf("=========== %s\n string address: %p\n data address : %p\n\n", webgpu1.c_str(), (void*)&webgpu1, (void*)webgpu1.data()); printf("=========== %s\n string address: %p\n data address : %p\n\n", webgpu2.c_str(), (void*)&webgpu2, (void*)webgpu2.data()); printf("=========== %s\n string address: %p\n data address : %p\n\n", webgpu3.c_str(), (void*)&webgpu3, (void*)webgpu3.data()); return 0; } ``` While using emscripten (targetting wasm32), the runtime result is like this: ``` =========== WebGPU_Buf string address: 0x10db0 data address : 0x10db0 =========== WebGPU_Buff string address: 0x10da4 data address : 0x10dc8 =========== WebGPU_Buffe string address: 0x10d98 data address : 0x10de0 =========== WebGPU_Buffer string address: 0x10d8c data address : 0x10df8 ``` Which shows that the string need to be no more than 10 bytes (exclude the '\0' at end) to enable SSO.
1 parent cee825d commit a5dc0f9

File tree

5 files changed

+9
-9
lines changed

5 files changed

+9
-9
lines changed

include/onnxruntime/core/framework/allocator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
8585
constexpr const char* OpenVINO_RT = "OpenVINO_RT";
8686
constexpr const char* OpenVINO_RT_NPU = "OpenVINO_RT_NPU";
8787
constexpr const char* QNN_HTP_SHARED = "QnnHtpShared";
88-
constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
89-
constexpr const char* WEBNN_TENSOR = "WebNN_Tensor";
88+
constexpr const char* WEBGPU_BUFFER = "WebGPU_Buf"; // limited to 10 chars to ensure std::string SSO for web
89+
constexpr const char* WEBNN_TENSOR = "WebNN_Ten"; // limited to 10 chars to ensure std::string SSO for web
9090

9191
constexpr size_t kAllocAlignment = 256;
9292
constexpr const size_t kAlloc4KAlignment = 4096;

js/node/src/inference_session_wrap.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ Napi::Value InferenceSessionWrap::Run(const Napi::CallbackInfo& info) {
181181
size_t inputIndex = 0;
182182
size_t outputIndex = 0;
183183
Ort::MemoryInfo cpuMemoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
184-
Ort::MemoryInfo gpuBufferMemoryInfo{"WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault};
184+
Ort::MemoryInfo gpuBufferMemoryInfo{"WebGPU_Buf", OrtDeviceAllocator, 0, OrtMemTypeDefault};
185185

186186
try {
187187
for (auto& name : inputNames_) {

js/node/src/tensor_helper.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ Napi::Value OrtValueToNapiValue(Napi::Env env, Ort::Value&& value) {
251251
// location
252252
auto memoryInfo = value.GetTensorMemoryInfo();
253253
bool isGpuBuffer = memoryInfo.GetDeviceType() == OrtMemoryInfoDeviceType_GPU &&
254-
memoryInfo.GetAllocatorName() == "WebGPU_Buffer";
254+
memoryInfo.GetAllocatorName() == "WebGPU_Buf";
255255

256256
// size
257257
auto size = tensorTypeAndShapeInfo.GetElementCount();

onnxruntime/contrib_ops/webgpu/quantization/gather_block_quantized.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ Status GatherBlockQuantized::ComputeInternal(ComputeContext& context) const {
149149
TensorShape data_representation_4bit_shape{x->Shape()};
150150
MLDataType new_dtype = (x_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) ? DataTypeImpl::GetType<UInt4x2>() : DataTypeImpl::GetType<Int4x2>();
151151
auto memory_info = OrtMemoryInfo{
152-
"WebGPU_Buffer",
152+
WEBGPU_BUFFER,
153153
OrtDeviceAllocator,
154154
OrtDevice{OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NONE, 0}};
155155

onnxruntime/wasm/api.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -393,10 +393,10 @@ OrtValue* OrtCreateTensor(int data_type, void* data, size_t data_length, size_t*
393393
OrtMemoryInfo* memory_info = nullptr;
394394
switch (data_location) {
395395
case DATA_LOCATION_GPU_BUFFER:
396-
RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
396+
RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebGPU_Buf", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
397397
break;
398398
case DATA_LOCATION_ML_TENSOR:
399-
RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebNN_Tensor", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
399+
RETURN_NULLPTR_IF_ERROR(CreateMemoryInfo, "WebNN_Ten", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
400400
break;
401401
default:
402402
RETURN_NULLPTR_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
@@ -563,9 +563,9 @@ int EMSCRIPTEN_KEEPALIVE OrtBindOutput(OrtIoBinding* io_binding,
563563
if (output_location != DATA_LOCATION_GPU_BUFFER && output_location != DATA_LOCATION_ML_TENSOR) {
564564
RETURN_ERROR_CODE_IF_ERROR(CreateCpuMemoryInfo, OrtDeviceAllocator, OrtMemTypeDefault, &memory_info);
565565
} else if (output_location == DATA_LOCATION_ML_TENSOR) {
566-
RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebNN_Tensor", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
566+
RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebNN_Ten", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
567567
} else {
568-
RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebGPU_Buffer", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
568+
RETURN_ERROR_CODE_IF_ERROR(CreateMemoryInfo, "WebGPU_Buf", OrtDeviceAllocator, 0, OrtMemTypeDefault, &memory_info);
569569
}
570570
REGISTER_AUTO_RELEASE_HANDLE(MemoryInfo, memory_info);
571571
return CHECK_STATUS(BindOutputToDevice, io_binding, name, memory_info);

0 commit comments

Comments
 (0)