pytorch · kirklandsign · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024
@@ -17,6 +17,7 @@
 #include <string>
 #include <unordered_map>
 
+#include <executorch/extension/llm/runner/irunner.h>
 #include <executorch/extension/llm/runner/stats.h>
 #include <executorch/extension/llm/runner/text_decoder_runner.h>
 #include <executorch/extension/llm/runner/text_prefiller.h>
@@ -26,7 +27,7 @@
 
 namespace example {
 
-class ET_EXPERIMENTAL Runner {
+class ET_EXPERIMENTAL Runner : public executorch::extension::llm::IRunner {
  public:
   explicit Runner(
       const std::string& model_path,

@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// An interface for LLM runners. Developers can create their own runner that
+// implements their own load and generation logic to run the model.
+
+#pragma once
+
+#include <functional>
+#include <string>
+
+#include <executorch/extension/llm/runner/stats.h>
+#include <executorch/extension/module/module.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+class ET_EXPERIMENTAL IRunner {
+ public:
+  virtual ~IRunner() = default;
+
+  // Checks if the model is loaded.
+  virtual bool is_loaded() const = 0;
 bool Runner::is_loaded() const { 
   return module_->is_loaded() && tokenizer_ && text_decoder_runner_ && 
       text_prefiller_ && text_token_generator_; 
 } 
 bool Runner::is_loaded() const { 
   return module_->is_loaded() && tokenizer_ && text_decoder_runner_ && 
       text_prefiller_ && text_token_generator_; 
 } 
+
+  // Load the model and tokenizer.
+  virtual ::executorch::runtime::Error load() = 0;
+
+  // Generate the output tokens.
+  virtual ::executorch::runtime::Error generate(
+      const std::string& prompt,
+      int32_t seq_len,
+      std::function<void(const std::string&)> token_callback = {},
+      std::function<void(const ::executorch::extension::llm::Stats&)>
+          stats_callback = {},
+      bool echo = true,
+      bool warming = false) = 0;
+
+  // Stop the generation.
+  virtual void stop() = 0;
+};
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch