Skip to content

Commit bc560b3

Browse files
committed
Minor nit
1 parent 3d0b621 commit bc560b3

File tree

2 files changed

+11
-9
lines changed

2 files changed

+11
-9
lines changed

backends/cuda/runtime/cuda_backend.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ class ET_EXPERIMENTAL CudaBackend final
6464
// ============================================================================
6565
//
6666
// This backend supports storing GPU tensors between execute() calls to enable
67-
// device-to-device (D2D) copies instead of slower host-to-device (H2D) copies.
68-
// This is useful for encoder-decoder models where the encoder output is reused
69-
// across many decoder iterations.
67+
// device-to-device (D2D) copies instead of slower host-to-device (H2D)
68+
// copies. This is useful for encoder-decoder models where the encoder output
69+
// is reused across many decoder iterations.
7070
//
7171
// SUPPORTED OPTIONS (via set_option):
7272
//
@@ -75,7 +75,8 @@ class ET_EXPERIMENTAL CudaBackend final
7575
// Only supports single-output methods.
7676
// Example: opts.set_option("store_output", "encoder_output");
7777
//
78-
// "use_stored_input" (string): For inputs matching the stored tensor's size,
78+
// "use_stored_input" (string): For inputs matching the stored tensor's
79+
// size,
7980
// use D2D copy from the stored tensor instead of H2D copy from CPU.
8081
// This setting persists across execute() calls until reset.
8182
// Example: opts.set_option("use_stored_input", "encoder_output");
@@ -401,7 +402,7 @@ class ET_EXPERIMENTAL CudaBackend final
401402

402403
// Process input tensors: ExecuTorch provides CPU tensors, create GPU
403404
// copies. For stored inputs, use GPU-to-GPU copy instead of CPU-to-GPU.
404-
for (int i = 0; i < n_inputs; i++) {
405+
for (size_t i = 0; i < n_inputs; i++) {
405406
// Get tensor dimensions and properties from ExecuTorch CPU tensor
406407
auto cpu_tensor = &(args[i]->toTensor());
407408
auto sizes = cpu_tensor->sizes();
@@ -478,7 +479,7 @@ class ET_EXPERIMENTAL CudaBackend final
478479
}
479480
// Process output tensors: create GPU counterparts for ExecuTorch CPU
480481
// tensors
481-
for (int i = 0; i < n_outputs; i++) {
482+
for (size_t i = 0; i < n_outputs; i++) {
482483
// Get output tensor dimensions from ExecuTorch CPU tensor
483484
auto cpu_output_tensor = &(args[i + n_inputs]->toTensor());
484485
auto sizes = cpu_output_tensor->sizes();
@@ -557,7 +558,7 @@ class ET_EXPERIMENTAL CudaBackend final
557558
}
558559

559560
// Copy GPU output results back to CPU output tensors
560-
for (int i = 0; i < n_outputs; i++) {
561+
for (size_t i = 0; i < n_outputs; i++) {
561562
auto cpu_output_tensor = &(args[i + n_inputs]->toTensor());
562563
// For DYNAMIC_BOUND tensors we try to resize
563564
ET_CHECK_OK_OR_RETURN_ERROR(

extension/asr/runner/runner.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,9 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(
333333
}
334334
}
335335

336-
// Reset stored input settings and free GPU memory after decoder loop completes.
337-
// This disables the D2D copy optimization and releases the stored encoder output.
336+
// Reset stored input settings and free GPU memory after decoder loop
337+
// completes. This disables the D2D copy optimization and releases the stored
338+
// encoder output.
338339
{
339340
::executorch::runtime::BackendOptions<2> opts;
340341
opts.set_option("reset_stored_input", true);

0 commit comments

Comments
 (0)