@@ -57,7 +57,7 @@ OrtStatus* TensorrtExecutionProviderFactory::CreateMemoryInfoForDevices(int num_
5757 /* device_id */ device_id, OrtDeviceMemoryType_DEFAULT,
5858 /* alignment*/ 0 , OrtAllocatorType::OrtDeviceAllocator, &mem_info));
5959
60- cuda_gpu_memory_infos. emplace_back ( MemoryInfoUniquePtr (mem_info, ort_api.ReleaseMemoryInfo ) );
60+ cuda_gpu_memory_infos[device_id] = MemoryInfoUniquePtr (mem_info, ort_api.ReleaseMemoryInfo );
6161
6262 // HOST_ACCESSIBLE memory should use the non-CPU device type
6363 mem_info = nullptr ;
@@ -66,7 +66,7 @@ OrtStatus* TensorrtExecutionProviderFactory::CreateMemoryInfoForDevices(int num_
6666 /* device_id */ device_id, OrtDeviceMemoryType_HOST_ACCESSIBLE,
6767 /* alignment*/ 0 , OrtAllocatorType::OrtDeviceAllocator, &mem_info));
6868
69- cuda_pinned_memory_infos. emplace_back ( MemoryInfoUniquePtr (mem_info, ort_api.ReleaseMemoryInfo ) );
69+ cuda_pinned_memory_infos[device_id] = MemoryInfoUniquePtr (mem_info, ort_api.ReleaseMemoryInfo );
7070 }
7171
7272 return nullptr ;
@@ -196,35 +196,50 @@ void ORT_API_CALL TensorrtExecutionProviderFactory::ReleaseEpImpl(OrtEpFactory*
196196 delete trt_ep;
197197}
198198
199- OrtStatus* ORT_API_CALL TensorrtExecutionProviderFactory::CreateAllocatorImpl (
200- OrtEpFactory* this_ptr, const OrtMemoryInfo* memory_info,
201- const OrtKeyValuePairs* /* allocator_options*/ ,
202- OrtAllocator** allocator) noexcept {
199+ OrtStatus* ORT_API_CALL TensorrtExecutionProviderFactory::CreateAllocatorImpl (OrtEpFactory* this_ptr,
200+ const OrtMemoryInfo* memory_info,
201+ const OrtKeyValuePairs* /* allocator_options*/ ,
202+ OrtAllocator** allocator) noexcept {
203203 auto & factory = *static_cast <TensorrtExecutionProviderFactory*>(this_ptr);
204- *allocator = nullptr ;
205204
206- // NOTE: The factory implementation can return a shared OrtAllocator* instead of creating a new instance on each call.
207- // To do this just make ReleaseAllocatorImpl a no-op.
205+ // NOTE: The factory implementation is free to return a shared OrtAllocator* instance instead of creating a new
206+ // allocator on each call. To do this have an allocator instance as an OrtEpFactory class member and make
207+ // ReleaseAllocatorImpl a no-op.
208208
209- // NOTE: If OrtMemoryInfo has allocator type (call MemoryInfoGetType) of OrtArenaAllocator, an ORT BFCArena
210- // will be added to wrap the returned OrtAllocator. The EP is free to implement its own arena, and if it
211- // wants to do this the OrtMemoryInfo MUST be created with an allocator type of OrtDeviceAllocator.
212-
213- // NOTE: The OrtMemoryInfo pointer should only ever be coming straight from an OrtEpDevice, and pointer based
214- // matching should work.
209+ // NOTE: EP should implement its own arena logic. ep_arena.cc/h is provided as a reference and we use it here for
210+ // device memory. `allocator_options` can be used for arena configuration and there is a helper in ep_arena.h
211+ // to convert from OrtKeyValuePairs to the same arena config settings that ORT uses.
212+ // You are of course free to have completely different settings.
215213
216214 const OrtMemoryDevice* mem_device = factory.ep_api .MemoryInfo_GetMemoryDevice (memory_info);
217215 uint32_t device_id = factory.ep_api .MemoryDevice_GetDeviceId (mem_device);
218216
219217 if (factory.ep_api .MemoryDevice_GetMemoryType (mem_device) == OrtDeviceMemoryType_DEFAULT) {
218+ // use the one that previously created
219+ if (factory.cuda_gpu_allocators .find (device_id) != factory.cuda_gpu_allocators .end ()) {
220+ *allocator = factory.cuda_gpu_allocators [device_id].get ();
221+ return nullptr ;
222+ }
223+
220224 // create a CUDA allocator
221225 auto cuda_allocator = std::make_unique<CUDAAllocator>(memory_info, static_cast <DeviceId>(device_id));
222- factory.device_id_to_cuda_gpu_memory_info_map [device_id] = memory_info;
223- *allocator = cuda_allocator.release ();
226+
227+ *allocator = cuda_allocator.get ();
228+ factory.cuda_gpu_allocators [device_id] = std::move (cuda_allocator);
229+
224230 } else if (factory.ep_api .MemoryDevice_GetMemoryType (mem_device) == OrtDeviceMemoryType_HOST_ACCESSIBLE) {
231+ // use the one that previously created
232+ if (factory.cuda_pinned_allocators .find (device_id) != factory.cuda_pinned_allocators .end ()) {
233+ *allocator = factory.cuda_pinned_allocators [device_id].get ();
234+ return nullptr ;
235+ }
236+
225237 // create a CUDA PINNED allocator
226238 auto cuda_pinned_allocator = std::make_unique<CUDAPinnedAllocator>(memory_info);
227- *allocator = cuda_pinned_allocator.release ();
239+
240+ *allocator = cuda_pinned_allocator.get ();
241+ factory.cuda_pinned_allocators [device_id] = std::move (cuda_pinned_allocator);
242+
228243 } else {
229244 return factory.ort_api .CreateStatus (ORT_INVALID_ARGUMENT,
230245 " INTERNAL ERROR! Unknown memory info provided to CreateAllocator. "
@@ -236,7 +251,8 @@ OrtStatus* ORT_API_CALL TensorrtExecutionProviderFactory::CreateAllocatorImpl(
236251
237252void ORT_API_CALL TensorrtExecutionProviderFactory::ReleaseAllocatorImpl (OrtEpFactory* /* this*/ ,
238253 OrtAllocator* allocator) noexcept {
239- delete static_cast <CUDAAllocator*>(allocator);
254+ // no-op. The allocators will be shared across sessions.
255+ // delete static_cast<CUDAAllocator*>(allocator);
240256}
241257
242258OrtStatus* ORT_API_CALL TensorrtExecutionProviderFactory::CreateDataTransferImpl (
0 commit comments