Skip to content

Commit 3d0b621

Browse files
committed
Add clear_stored_tensor option
1 parent 9e1a3cc commit 3d0b621

File tree

2 files changed

+65
-6
lines changed

2 files changed

+65
-6
lines changed

backends/cuda/runtime/cuda_backend.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,47 @@ struct GpuTensorRef {
5959
class ET_EXPERIMENTAL CudaBackend final
6060
: public ::executorch::runtime::BackendInterface {
6161
private:
62+
// ============================================================================
63+
// GPU Tensor Storage for D2D Copy Optimization
64+
// ============================================================================
65+
//
66+
// This backend supports storing GPU tensors between execute() calls to enable
67+
// device-to-device (D2D) copies instead of slower host-to-device (H2D) copies.
68+
// This is useful for encoder-decoder models where the encoder output is reused
69+
// across many decoder iterations.
70+
//
71+
// SUPPORTED OPTIONS (via set_option):
72+
//
73+
// "store_output" (string): Store the output tensor under this name after
74+
// the next execute() call. The tensor remains on GPU until cleared.
75+
// Only supports single-output methods.
76+
// Example: opts.set_option("store_output", "encoder_output");
77+
//
78+
// "use_stored_input" (string): For inputs matching the stored tensor's size,
79+
// use D2D copy from the stored tensor instead of H2D copy from CPU.
80+
// This setting persists across execute() calls until reset.
81+
// Example: opts.set_option("use_stored_input", "encoder_output");
82+
//
83+
// "reset_stored_input" (bool): Clear the use_stored_input setting.
84+
// Does NOT delete the stored tensor - only stops using it for D2D.
85+
// Example: opts.set_option("reset_stored_input", true);
86+
//
87+
// "clear_stored_tensor" (string): Delete the named tensor from storage,
88+
// freeing GPU memory. Use after decoder loop completes.
89+
// Example: opts.set_option("clear_stored_tensor", "encoder_output");
90+
//
91+
// TYPICAL USAGE PATTERN (encoder-decoder model):
92+
//
93+
// 1. Before encoder: set_option("store_output", "encoder_output")
94+
// 2. Execute encoder (output is stored on GPU)
95+
// 3. Before decoder loop: set_option("use_stored_input", "encoder_output")
96+
// 4. Execute decoder N times (D2D copies for encoder output input)
97+
// 5. After decoder loop:
98+
// set_option("reset_stored_input", true)
99+
// set_option("clear_stored_tensor", "encoder_output")
100+
//
101+
// ============================================================================
102+
62103
// Storage control options (set via set_option before execute)
63104
mutable std::string
64105
store_output_name_; // Name to store output under (empty = none)
@@ -171,6 +212,25 @@ class ET_EXPERIMENTAL CudaBackend final
171212
return Error::InvalidArgument;
172213
}
173214
}
215+
// Handle clear_stored_tensor: expects a string name
216+
// Deletes the named GPU tensor from storage, freeing GPU memory.
217+
else if (strcmp(option.key, "clear_stored_tensor") == 0) {
218+
if (auto* arr = std::get_if<
219+
std::array<char, executorch::runtime::kMaxOptionValueLength>>(
220+
&option.value)) {
221+
std::string name(arr->data());
222+
auto it = gpu_tensors_.find(name);
223+
if (it != gpu_tensors_.end()) {
224+
if (it->second.handle != nullptr) {
225+
aoti_torch_delete_tensor_object(it->second.handle);
226+
}
227+
gpu_tensors_.erase(it);
228+
}
229+
} else {
230+
ET_LOG(Warning, "clear_stored_tensor option expects a string value");
231+
return Error::InvalidArgument;
232+
}
233+
}
174234
}
175235
return Error::Ok;
176236
}

extension/asr/runner/runner.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -333,16 +333,15 @@ Result<std::vector<int64_t>> AsrRunner::transcribe(
333333
}
334334
}
335335

336-
// Reset stored input settings after decoder loop completes.
337-
// This disables the D2D copy optimization for subsequent execute() calls.
338-
// Note: The stored GPU tensor remains in memory until the next encoder run
339-
// (which overwrites it) or until the backend is destroyed.
336+
// Reset stored input settings and free GPU memory after decoder loop completes.
337+
// This disables the D2D copy optimization and releases the stored encoder output.
340338
{
341-
::executorch::runtime::BackendOptions<1> opts;
339+
::executorch::runtime::BackendOptions<2> opts;
342340
opts.set_option("reset_stored_input", true);
341+
opts.set_option("clear_stored_tensor", "encoder_output");
343342
auto err = ::executorch::runtime::set_option("CudaBackend", opts.view());
344343
if (err != ::executorch::runtime::Error::Ok) {
345-
ET_LOG(Warning, "Failed to set reset_stored_input option");
344+
ET_LOG(Warning, "Failed to reset stored input settings");
346345
}
347346
}
348347

0 commit comments

Comments
 (0)