Skip to content

Commit 2e23679

Browse files
committed
Minor nits
1 parent bc560b3 commit 2e23679

File tree

2 files changed

+53
-36
lines changed

2 files changed

+53
-36
lines changed

backends/cuda/runtime/cuda_backend.cpp

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ class ET_EXPERIMENTAL CudaBackend final
185185
&option.value)) {
186186
store_output_name_ = std::string(arr->data());
187187
} else {
188-
ET_LOG(Warning, "store_output option expects a string value");
188+
ET_LOG(Error, "store_output option expects a string value");
189189
return Error::InvalidArgument;
190190
}
191191
}
@@ -196,7 +196,7 @@ class ET_EXPERIMENTAL CudaBackend final
196196
&option.value)) {
197197
use_stored_input_name_ = std::string(arr->data());
198198
} else {
199-
ET_LOG(Warning, "use_stored_input option expects a string value");
199+
ET_LOG(Error, "use_stored_input option expects a string value");
200200
return Error::InvalidArgument;
201201
}
202202
}
@@ -209,7 +209,7 @@ class ET_EXPERIMENTAL CudaBackend final
209209
use_stored_input_name_.clear();
210210
}
211211
} else {
212-
ET_LOG(Warning, "reset_stored_input option expects a boolean value");
212+
ET_LOG(Error, "reset_stored_input option expects a boolean value");
213213
return Error::InvalidArgument;
214214
}
215215
}
@@ -228,7 +228,7 @@ class ET_EXPERIMENTAL CudaBackend final
228228
gpu_tensors_.erase(it);
229229
}
230230
} else {
231-
ET_LOG(Warning, "clear_stored_tensor option expects a string value");
231+
ET_LOG(Error, "clear_stored_tensor option expects a string value");
232232
return Error::InvalidArgument;
233233
}
234234
}
@@ -400,6 +400,10 @@ class ET_EXPERIMENTAL CudaBackend final
400400
};
401401
TensorCleanup cleanup{gpu_inputs, gpu_outputs, gpu_tensors_};
402402

403+
// Track which input index was matched for D2D copy (for duplicate
404+
// detection)
405+
ssize_t matched_input_idx = -1;
406+
403407
// Process input tensors: ExecuTorch provides CPU tensors, create GPU
404408
// copies. For stored inputs, use GPU-to-GPU copy instead of CPU-to-GPU.
405409
for (size_t i = 0; i < n_inputs; i++) {
@@ -424,15 +428,12 @@ class ET_EXPERIMENTAL CudaBackend final
424428
ET_CHECK_OR_RETURN_ERROR(
425429
create_err == Error::Ok,
426430
Internal,
427-
"Failed to create GPU tensor for input %d",
431+
"Failed to create GPU tensor for input %zu",
428432
i);
429433

430434
gpu_inputs[i] = gpu_input_handle;
431435

432436
// Check if this input matches a stored GPU tensor (by size).
433-
// Note: Size-based matching assumes only one input will match. If
434-
// multiple inputs have the same byte size as the stored tensor, the first
435-
// match wins.
436437
if (!use_stored_input_name_.empty()) {
437438
auto it = gpu_tensors_.find(use_stored_input_name_);
438439
if (it != gpu_tensors_.end()) {
@@ -443,29 +444,45 @@ class ET_EXPERIMENTAL CudaBackend final
443444

444445
// Match by size: use stored tensor if sizes match
445446
if (copy_bytes == ref.size_bytes) {
446-
ET_LOG(
447-
Debug,
448-
"Using stored tensor '%s' for input %d (%zu bytes, D2D copy)",
449-
use_stored_input_name_.c_str(),
450-
i,
451-
copy_bytes);
452-
453-
// GPU-to-GPU copy: fast DMA transfer, normalizes tensor format
454-
cudaError_t cuda_err = cudaMemcpy(
455-
gpu_inputs[i]->data_ptr(),
456-
ref.data_ptr,
457-
copy_bytes,
458-
cudaMemcpyDeviceToDevice);
459-
460-
ET_CHECK_OR_RETURN_ERROR(
461-
cuda_err == cudaSuccess,
462-
Internal,
463-
"Failed GPU-to-GPU copy for input %d: %s",
464-
i,
465-
cudaGetErrorString(cuda_err));
466-
467-
// Skip the CPU-to-GPU copy below
468-
continue;
447+
if (matched_input_idx >= 0) {
448+
// Another input already matched - warn about ambiguity
449+
ET_LOG(
450+
Error,
451+
"Multiple inputs match stored tensor '%s' size (%zu bytes): "
452+
"input %zd was used, input %zu also matches. "
453+
"Consider using unique tensor sizes or a different matching strategy.",
454+
use_stored_input_name_.c_str(),
455+
copy_bytes,
456+
matched_input_idx,
457+
i);
458+
} else {
459+
// First match - perform D2D copy
460+
matched_input_idx = static_cast<ssize_t>(i);
461+
462+
ET_LOG(
463+
Debug,
464+
"Using stored tensor '%s' for input %zu (%zu bytes, D2D copy)",
465+
use_stored_input_name_.c_str(),
466+
i,
467+
copy_bytes);
468+
469+
// GPU-to-GPU copy: fast DMA transfer, normalizes tensor format
470+
cudaError_t cuda_err = cudaMemcpy(
471+
gpu_inputs[i]->data_ptr(),
472+
ref.data_ptr,
473+
copy_bytes,
474+
cudaMemcpyDeviceToDevice);
475+
476+
ET_CHECK_OR_RETURN_ERROR(
477+
cuda_err == cudaSuccess,
478+
Internal,
479+
"Failed GPU-to-GPU copy for input %zu: %s",
480+
i,
481+
cudaGetErrorString(cuda_err));
482+
483+
// Skip the CPU-to-GPU copy below
484+
continue;
485+
}
469486
}
470487
}
471488
}
@@ -474,7 +491,7 @@ class ET_EXPERIMENTAL CudaBackend final
474491
ET_CHECK_OR_RETURN_ERROR(
475492
aoti_torch_copy_(gpu_inputs[i], cpu_tensor, 0) == Error::Ok,
476493
Internal,
477-
"Failed to copy input %d from CPU to GPU",
494+
"Failed to copy input %zu from CPU to GPU",
478495
i);
479496
}
480497
// Process output tensors: create GPU counterparts for ExecuTorch CPU
@@ -501,7 +518,7 @@ class ET_EXPERIMENTAL CudaBackend final
501518
ET_CHECK_OR_RETURN_ERROR(
502519
create_err == Error::Ok,
503520
Internal,
504-
"Failed to create GPU tensor for output %d",
521+
"Failed to create GPU tensor for output %zu",
505522
i);
506523

507524
gpu_outputs[i] = gpu_output_handle;
@@ -563,11 +580,11 @@ class ET_EXPERIMENTAL CudaBackend final
563580
// For DYNAMIC_BOUND tensors we try to resize
564581
ET_CHECK_OK_OR_RETURN_ERROR(
565582
resize_tensor(*cpu_output_tensor, gpu_outputs[i]->sizes()),
566-
"Error resizing tensor at output index %d",
583+
"Error resizing tensor at output index %zu",
567584
i);
568585
ET_CHECK_OK_OR_RETURN_ERROR(
569586
aoti_torch_copy_(cpu_output_tensor, gpu_outputs[i], 0),
570-
"Failed to copy GPU output %d back to CPU",
587+
"Failed to copy GPU output %zu back to CPU",
571588
i);
572589
}
573590

extension/asr/runner/runner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(
342342
opts.set_option("clear_stored_tensor", "encoder_output");
343343
auto err = ::executorch::runtime::set_option("CudaBackend", opts.view());
344344
if (err != ::executorch::runtime::Error::Ok) {
345-
ET_LOG(Warning, "Failed to reset stored input settings");
345+
ET_LOG(Error, "Failed to reset stored input settings");
346346
}
347347
}
348348

0 commit comments

Comments
 (0)