From 2f2c207ae3988d2103ba10eb542ac1788460c906 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Tue, 1 Jul 2025 17:39:00 -0700
Subject: [PATCH 1/2] Initial impl

---
 src/pb_memory.cc  | 12 +++++++++++-
 src/shm_manager.h |  5 ++++-
 2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/src/pb_memory.cc b/src/pb_memory.cc
index fa32bb1c..71aa39ba 100644
--- a/src/pb_memory.cc
+++ b/src/pb_memory.cc
@@ -1,4 +1,4 @@
-// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -226,6 +226,11 @@ PbMemory::LoadFromSharedMemory(
   MemoryShm* memory_shm_ptr = reinterpret_cast<MemoryShm*>(data_shm);
   char* memory_data_shm = data_shm + sizeof(MemoryShm);
 
+  if (memory_data_shm + memory_shm_ptr->byte_size >
+      (char*)shm_pool->GetBaseAddress() + shm_pool->GetCurrentCapacity()) {
+    throw PythonBackendException("Attempted to access out of bounds memory.");
+  }
+
   char* data_ptr = nullptr;
   bool opened_cuda_ipc_handle = false;
   if (memory_shm_ptr->memory_type == TRITONSERVER_MEMORY_GPU &&
@@ -275,6 +280,11 @@ PbMemory::LoadFromSharedMemory(
       reinterpret_cast<MemoryShm*>(memory_shm.data_.get());
   char* memory_data_shm = memory_shm.data_.get() + sizeof(MemoryShm);
 
+  if (memory_data_shm + memory_shm_ptr->byte_size >
+      (char*)shm_pool->GetBaseAddress() + shm_pool->GetCurrentCapacity()) {
+    throw PythonBackendException("Attempted to access out of bounds memory.");
+  }
+
   char* data_ptr = nullptr;
   bool opened_cuda_ipc_handle = false;
   if (memory_shm_ptr->memory_type == TRITONSERVER_MEMORY_GPU) {
diff --git a/src/shm_manager.h b/src/shm_manager.h
index 25e04570..e0799a07 100644
--- a/src/shm_manager.h
+++ b/src/shm_manager.h
@@ -1,4 +1,4 @@
-// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -188,6 +188,9 @@ class SharedMemoryManager {
     return cuda_memory_pool_manager_;
   }
 
+  uint64_t GetCurrentCapacity() { return current_capacity_; }
+  void* GetBaseAddress() { return managed_buffer_->get_address(); }
+
   ~SharedMemoryManager() noexcept(false);
 
  private:

From 595a48862cf6ecc78045571f1eab8723511aadaa Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Mon, 7 Jul 2025 01:23:48 -0700
Subject: [PATCH 2/2] Only check CPU shm to pass unit tests

---
 src/pb_memory.cc | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/src/pb_memory.cc b/src/pb_memory.cc
index 71aa39ba..5b678f1a 100644
--- a/src/pb_memory.cc
+++ b/src/pb_memory.cc
@@ -26,6 +26,8 @@
 
 #include "pb_memory.h"
 
+#include <sstream>
+
 namespace triton { namespace backend { namespace python {
 
 std::unique_ptr<PbMemory>
@@ -225,12 +227,6 @@ PbMemory::LoadFromSharedMemory(
 {
   MemoryShm* memory_shm_ptr = reinterpret_cast<MemoryShm*>(data_shm);
   char* memory_data_shm = data_shm + sizeof(MemoryShm);
-
-  if (memory_data_shm + memory_shm_ptr->byte_size >
-      (char*)shm_pool->GetBaseAddress() + shm_pool->GetCurrentCapacity()) {
-    throw PythonBackendException("Attempted to access out of bounds memory.");
-  }
-
   char* data_ptr = nullptr;
   bool opened_cuda_ipc_handle = false;
   if (memory_shm_ptr->memory_type == TRITONSERVER_MEMORY_GPU &&
@@ -265,6 +261,19 @@ PbMemory::LoadFromSharedMemory(
   } else {
     data_ptr = memory_data_shm;
   }
+
+  // This check only validates CPU shared memory access.
+  if (memory_shm_ptr->memory_type != TRITONSERVER_MEMORY_GPU &&
+      (data_ptr + memory_shm_ptr->byte_size >
+       (char*)shm_pool->GetBaseAddress() + shm_pool->GetCurrentCapacity())) {
+    std::ostringstream oss;
+    oss << "0x" << std::hex
+        << (reinterpret_cast<uintptr_t>(data_ptr) + memory_shm_ptr->byte_size);
+    throw PythonBackendException(
+        std::string("Attempted to access out of bounds memory address ") +
+        oss.str());
+  }
+
   return std::unique_ptr<PbMemory>(new PbMemory(
       data_shm, data_ptr, handle,
       opened_cuda_ipc_handle /* opened_cuda_ipc_handle */));
@@ -280,11 +289,6 @@ PbMemory::LoadFromSharedMemory(
       reinterpret_cast<MemoryShm*>(memory_shm.data_.get());
   char* memory_data_shm = memory_shm.data_.get() + sizeof(MemoryShm);
 
-  if (memory_data_shm + memory_shm_ptr->byte_size >
-      (char*)shm_pool->GetBaseAddress() + shm_pool->GetCurrentCapacity()) {
-    throw PythonBackendException("Attempted to access out of bounds memory.");
-  }
-
   char* data_ptr = nullptr;
   bool opened_cuda_ipc_handle = false;
   if (memory_shm_ptr->memory_type == TRITONSERVER_MEMORY_GPU) {
@@ -319,6 +323,19 @@ PbMemory::LoadFromSharedMemory(
   } else {
     data_ptr = memory_data_shm;
   }
+
+  // This check only validates CPU shared memory access.
+  if (memory_shm_ptr->memory_type != TRITONSERVER_MEMORY_GPU &&
+      (data_ptr + memory_shm_ptr->byte_size >
+       (char*)shm_pool->GetBaseAddress() + shm_pool->GetCurrentCapacity())) {
+    std::ostringstream oss;
+    oss << "0x" << std::hex
+        << (reinterpret_cast<uintptr_t>(data_ptr) + memory_shm_ptr->byte_size);
+    throw PythonBackendException(
+        std::string("Attempted to access out of bounds memory address ") +
+        oss.str());
+  }
+
   return std::unique_ptr<PbMemory>(new PbMemory(
       memory_shm, data_ptr,
       opened_cuda_ipc_handle /* opened_cuda_ipc_handle */));