Skip to content

Commit f099cfb

Browse files
authored
Qualcomm AI Engine Direct - Fix mem_handel register twice issue (#13410)
Summary: - Insert registered handle in pre_registered_handles_ map to avoid register multiple times for the same data_ptr Background: When running llama in lookahead mode using the same AR-N model for both the prompt processor and token generator. The input and output are the same, and the kv cache is shared between both components.. This causes a "register twice" error message from QNN when a shared buffer (Smart Mask) is used. Error message: ``` [ERROR] [Qnn ExecuTorch]: <E> Memory Handle duplicate found, matches Handle ID 0x2 [ERROR] [Qnn ExecuTorch]: <E> Mem handle exists already [ERROR] [Qnn ExecuTorch]: <E> Failed to register memHandles [ERROR] [Qnn ExecuTorch]: <E> Failed to register memHandles [ERROR] [Qnn ExecuTorch]: <E> Failed to register mem with error 0x1f42 ``` cc: @sxu , @haowhsu-quic
1 parent 204eee9 commit f099cfb

File tree

3 files changed

+11
-3
lines changed

3 files changed

+11
-3
lines changed

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,8 @@ Error QnnManager::RegisterCustomMem(
291291
data_ptr,
292292
unaligned_custom_mem_base,
293293
total_custom_mem_size,
294-
tensor_offset) == Error::Ok,
294+
tensor_offset,
295+
info) == Error::Ok,
295296
Internal,
296297
"Fail to register to shared memory.");
297298

backends/qualcomm/runtime/backends/QnnMemManager.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,16 @@ Error QnnMemManager::RegisterIonMem(
5656
return Error::Ok;
5757
}
5858

59+
// TODO: Find a better way to unify RegisterCustomMem and
60+
// PreRegisterCustomMemHandle
5961
Error QnnMemManager::RegisterCustomMem(
6062
const std::shared_ptr<TensorWrapper>& tensor_wrapper,
6163
int32_t mem_fd,
6264
void* mem_ptr,
6365
void* unaligned_custom_mem_base,
6466
size_t total_custom_mem_size,
65-
size_t tensor_offset) {
67+
size_t tensor_offset,
68+
const CustomMemTensorInfo& info) {
6669
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
6770
Qnn_MemDescriptor_t descriptor = {
6871
{tensor_wrapper->GetRank(), tensor_wrapper->GetDims(), nullptr},
@@ -94,6 +97,7 @@ Error QnnMemManager::RegisterCustomMem(
9497
return Error::Internal;
9598
}
9699
tensor_wrapper->SetMemHandle(handle);
100+
pre_registered_handles_.insert({info, handle});
97101
registered_map_.insert({handle, mem_ptr});
98102
if (log_level_ >= QnnExecuTorchLogLevel::kLogLevelInfo) {
99103
QNN_EXECUTORCH_LOG_INFO(

backends/qualcomm/runtime/backends/QnnMemManager.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class QnnMemManager {
4141
void* mem_ptr,
4242
void* unaligned_custom_mem_base,
4343
size_t total_custom_mem_size,
44-
size_t tensor_offset);
44+
size_t tensor_offset,
45+
const CustomMemTensorInfo& info);
4546

4647
// Pre-register custom mem handle from SharedBuffer. Bring forward the
4748
// memHandle creating time from execution to initialization.
@@ -67,7 +68,9 @@ class QnnMemManager {
6768
const QnnImplementation& implementation_;
6869
QnnContext* context_;
6970
QnnExecuTorchLogLevel log_level_;
71+
// Store the registered Qnn_MemHandle_t for de-registration
7072
std::unordered_map<Qnn_MemHandle_t, void*> registered_map_;
73+
// Store the pre-registered custom mem handles
7174
std::unordered_map<CustomMemTensorInfo, void*> pre_registered_handles_;
7275
std::unordered_map<executorch::aten::ScalarType, Qnn_DataType_t>
7376
scalar_type_to_qnn_dtype_ = {

0 commit comments

Comments
 (0)