Enhance the QNNContextProc

quic-zhanweiw · quic-zhanweiw · commit 68f00b7b899c · 2026-01-18T10:32:21.000+08:00
diff --git a/pybind/AppBuilder.h b/pybind/AppBuilder.h
@@ -351,7 +351,11 @@ std::vector<py::array> inference_P(std::string model_name, std::string proc_name
         }
     }
 
-    g_LibAppBuilder.ModelInference(model_name, proc_name, share_memory_name, inputBuffers, inputSize, outputBuffers, outputSize, perf_profile, graphIndex);
+    bool success = g_LibAppBuilder.ModelInference(model_name, proc_name, share_memory_name, inputBuffers, inputSize, outputBuffers, outputSize, perf_profile, graphIndex);
+    if (!success) {
+        QNN_ERR("ModelInference failed for model: %s, proc: %s", model_name.c_str(), proc_name.c_str());
+        return {};
+    }
 
     //QNN_INF("inference_P::inference output vector length: %d\n", outputBuffers.size());
 
diff --git a/script/qai_appbuilder/qnncontext.py b/script/qai_appbuilder/qnncontext.py
@@ -269,6 +269,7 @@ def __init__(self,
         self.proc_name = proc_name
         self.input_data_type = input_data_type
         self.output_data_type = output_data_type
+        self.model_name = model_name
 
         if self.proc_name == "None":
             raise ValueError("proc_name must be specified!")
@@ -282,6 +283,11 @@ def __init__(self,
 
     #@timer
     def Inference(self, shareMemory, input, perf_profile=PerfProfile.DEFAULT, graphIndex=0):
+        total_input_bytes = sum(arr.nbytes for arr in input)
+        if total_input_bytes > shareMemory.share_memory_size:
+            raise ValueError(f"Input data size {total_input_bytes} exceeds share memory size {shareMemory.share_memory_size}, you need to create a larger share memory for model {self.model_name} @ process {self.proc_name}.")
+            # print(f"Input data size {total_input_bytes} exceeds share memory size {shareMemory.share_memory_size}, you need to create a larger share memory for model {self.model_name} @ process {self.proc_name}.")
+
         return self._inference_and_reshape(
             input,
             lambda _in: self.m_context.Inference(shareMemory.m_memory, _in, perf_profile, graphIndex,
diff --git a/src/QnnSampleApp.cpp b/src/QnnSampleApp.cpp
@@ -1588,12 +1588,18 @@ sample_app::StatusCode sample_app::QnnSampleApp::executeGraphsBuffers(std::vecto
       }
 
       // Compute the span end offset (relative to shared memory base) for this input buffer.
-      ptrdiff_t delta = inputBuffers[inputIdx] - pShareBuffer;
-      if (delta < 0) {
+      const uintptr_t baseAddr = reinterpret_cast<uintptr_t>(pShareBuffer);
+      const uintptr_t currAddr = reinterpret_cast<uintptr_t>(inputBuffers[inputIdx]);
+      if (currAddr < baseAddr) {
         QNN_ERROR("Invalid shared buffer layout: inputIdx=%zu is below shared base pointer", inputIdx);
         return StatusCode::FAILURE;
       }
-      const size_t offsetBytes = static_cast<size_t>(delta);
+      const uintptr_t deltaAddr = currAddr - baseAddr;
+      if (deltaAddr > (std::numeric_limits<size_t>::max)()) {
+        QNN_ERROR("share memory size overflow while converting input offset");
+        return StatusCode::FAILURE;
+      }
+      const size_t offsetBytes = static_cast<size_t>(deltaAddr);
       if (offsetBytes > (std::numeric_limits<size_t>::max)() - bytesNeeded) {
         QNN_ERROR("share memory size overflow while accumulating required input bytes");
         return StatusCode::FAILURE;
diff --git a/src/SVC/Utils/Utils.hpp b/src/SVC/Utils/Utils.hpp
@@ -333,6 +333,67 @@ BOOL TalkToSvc_Inference(std::string model_name, std::string proc_name, std::str
         return false;
     }
 
+
+    // Early validation to avoid VectorToShareMem memcpy crash.
+    if (inputBuffers.size() != inputSize.size()) {
+        QNN_ERR("TalkToSvc_Inference: inputBuffers/inputSize length mismatch. buffers=%zu size=%zu\n", inputBuffers.size(), inputSize.size());
+        return false;
+    }
+    if (!pShareMemInfo->lpBase || pShareMemInfo->size == 0) {
+        QNN_ERR("TalkToSvc_Inference: invalid share memory base or size. name=%s lpBase=%p size=%llu\n", share_memory_name.c_str(), pShareMemInfo->lpBase, (unsigned long long)pShareMemInfo->size);
+        return false;
+    }
+
+    // Compute required size according to VectorToShareMem's offset strategy: reserve sizes of in-share buffers + sizes of out-of-share buffers.
+    {
+        uint8_t* base = (uint8_t*)pShareMemInfo->lpBase;
+        uint8_t* end = base + pShareMemInfo->size;
+        size_t reserved = 0;
+        size_t toCopy = 0;
+
+        for (size_t i = 0; i < inputBuffers.size(); ++i) {
+            uint8_t* buf = inputBuffers[i];
+            size_t sz = inputSize[i];
+
+            if (!buf && sz > 0) {
+                QNN_ERR("TalkToSvc_Inference: null input buffer at index %zu with non-zero size %llu\n", i, (unsigned long long)sz);
+                return false;
+            }
+
+            // In-share: [base, end)
+            if (buf >= base && buf < end) {
+                if (sz > 0 && ((size_t)(end - buf) < sz)) {
+                    QNN_ERR("TalkToSvc_Inference: in-share input buffer out of bounds. idx=%zu buf=%p size=%llu share=[%p,%p)\n", i, buf, (unsigned long long)sz, base, end);
+                    return false;
+                }
+                if (std::numeric_limits<size_t>::max() - reserved < sz) {
+                    QNN_ERR("TalkToSvc_Inference: size_t overflow while accumulating reserved. idx=%zu\n", i);
+                    return false;
+                }
+                reserved += sz;
+            } else {
+                if (std::numeric_limits<size_t>::max() - toCopy < sz) {
+                    QNN_ERR("TalkToSvc_Inference: size_t overflow while accumulating toCopy. idx=%zu\n", i);
+                    return false;
+                }
+                toCopy += sz;
+            }
+        }
+
+        if (std::numeric_limits<size_t>::max() - reserved < toCopy) {
+            QNN_ERR("TalkToSvc_Inference: size_t overflow while computing totalNeeded.\n");
+            return false;
+        }
+        
+        size_t totalNeeded = reserved + toCopy;
+        if (totalNeeded > pShareMemInfo->size) {
+            QNN_ERR("TalkToSvc_Inference: share memory too small. required=%llu (reserved=%llu copy=%llu) share_size=%llu name=%s\n",
+                    (unsigned long long)totalNeeded, (unsigned long long)reserved, (unsigned long long)toCopy, (unsigned long long)pShareMemInfo->size, share_memory_name.c_str());
+            return false;
+        }
+    }
+
+
     HANDLE hSvcPipeInWrite = pProcInfo->hSvcPipeInWrite;
     HANDLE hSvcPipeOutRead = pProcInfo->hSvcPipeOutRead;
     DWORD dwRead = 0, dwWrite = 0;

Original file line number	Diff line number	Diff line change
`@@ -351,7 +351,11 @@ std::vector<py::array> inference_P(std::string model_name, std::string proc_name`
`351`	`351`	`}`
`352`	`352`	`}`
`353`	`353`
`354`		`- g_LibAppBuilder.ModelInference(model_name, proc_name, share_memory_name, inputBuffers, inputSize, outputBuffers, outputSize, perf_profile, graphIndex);`
	`354`	`+ bool success = g_LibAppBuilder.ModelInference(model_name, proc_name, share_memory_name, inputBuffers, inputSize, outputBuffers, outputSize, perf_profile, graphIndex);`
	`355`	`+ if (!success) {`
	`356`	`+ QNN_ERR("ModelInference failed for model: %s, proc: %s", model_name.c_str(), proc_name.c_str());`
	`357`	`+ return {};`
	`358`	`+ }`
`355`	`359`
`356`	`360`	`//QNN_INF("inference_P::inference output vector length: %d\n", outputBuffers.size());`
`357`	`361`