@@ -119,6 +119,21 @@ class ET_EXPERIMENTAL MultimodalRunner {
119119 std::function<void (const std::string&)> token_callback = {},
120120 std::function<void (const Stats&)> stats_callback = {});
121121
122+ /* *
123+ * Generate tokens from multimodal inputs with move semantics.
124+ * This overload allows efficient transfer of temporary vectors.
125+ * @param inputs A vector of MultimodalInput objects (moved).
126+ * @param config Generation configuration parameters.
127+ * @param token_callback Callback function called for each generated token.
128+ * @param stats_callback Callback function for generation statistics.
129+ * @return The error code. KV cache position is tracked internally in pos_.
130+ */
131+ virtual ::executorch::runtime::Error generate (
132+ std::vector<MultimodalInput>&& inputs,
133+ const GenerationConfig& config,
134+ std::function<void (const std::string&)> token_callback = {},
135+ std::function<void (const Stats&)> stats_callback = {});
136+
122137 /* *
123138 * Prefill multimodal inputs, for example to reload chat history.
124139 * @param inputs A vector of MultimodalInput objects containing images and
@@ -128,6 +143,15 @@ class ET_EXPERIMENTAL MultimodalRunner {
128143 virtual ::executorch::runtime::Error prefill (
129144 const std::vector<MultimodalInput>& inputs);
130145
146+ /* *
147+ * Prefill multimodal inputs with move semantics.
148+ * This overload allows efficient transfer of temporary vectors.
149+ * @param inputs A vector of MultimodalInput objects (moved).
150+ * @return The error code. KV cache position is tracked internally in pos_.
151+ */
152+ virtual ::executorch::runtime::Error prefill (
153+ std::vector<MultimodalInput>&& inputs);
154+
131155 inline void stop () {
132156 text_token_generator_->stop ();
133157 }
0 commit comments