Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion examples/models/llama/runner/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <string>
#include <unordered_map>

#include <executorch/extension/llm/runner/irunner.h>
#include <executorch/extension/llm/runner/stats.h>
#include <executorch/extension/llm/runner/text_decoder_runner.h>
#include <executorch/extension/llm/runner/text_prefiller.h>
Expand All @@ -26,7 +27,7 @@

namespace example {

class ET_EXPERIMENTAL Runner {
class ET_EXPERIMENTAL Runner : public executorch::extension::llm::IRunner {
public:
explicit Runner(
const std::string& model_path,
Expand Down
50 changes: 50 additions & 0 deletions extension/llm/runner/irunner.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

// An interface for LLM runners. Developers can create their own runner that
// implements their own load and generation logic to run the model.

#pragma once

#include <functional>
#include <string>

#include <executorch/extension/llm/runner/stats.h>
#include <executorch/extension/module/module.h>

namespace executorch {
namespace extension {
namespace llm {

class ET_EXPERIMENTAL IRunner {
public:
virtual ~IRunner() = default;

// Checks if the model is loaded.
virtual bool is_loaded() const = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can have a default implementation right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe not.

bool Runner::is_loaded() const {
return module_->is_loaded() && tokenizer_ && text_decoder_runner_ &&
text_prefiller_ && text_token_generator_;
}

it's quite impl detail


// Load the model and tokenizer.
virtual ::executorch::runtime::Error load() = 0;

// Generate the output tokens.
virtual ::executorch::runtime::Error generate(
const std::string& prompt,
int32_t seq_len,
std::function<void(const std::string&)> token_callback = {},
std::function<void(const ::executorch::extension::llm::Stats&)>
stats_callback = {},
bool echo = true,
bool warming = false) = 0;

// Stop the generation.
virtual void stop() = 0;
};

} // namespace llm
} // namespace extension
} // namespace executorch
Loading