Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/models/llama2/runner/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

namespace example {

class Runner {
class ET_EXPERIMENTAL Runner {
public:
explicit Runner(
const std::string& model_path,
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/runner/llava_image_prefiller.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

namespace example {

class LlavaImagePrefiller
class ET_EXPERIMENTAL LlavaImagePrefiller
: public ::executorch::extension::llm::ImagePrefiller {
public:
LlavaImagePrefiller(::executorch::extension::Module* module)
Expand Down
3 changes: 2 additions & 1 deletion examples/models/llava/runner/llava_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@

namespace example {

class LlavaRunner : public ::executorch::extension::llm::MultimodalRunner {
class ET_EXPERIMENTAL LlavaRunner
: public ::executorch::extension::llm::MultimodalRunner {
public:
explicit LlavaRunner(
const std::string& model_path,
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/runner/llava_text_decoder_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

namespace example {

class LlavaTextDecoderRunner
class ET_EXPERIMENTAL LlavaTextDecoderRunner
: public executorch::extension::llm::TextDecoderRunner {
public:
LlavaTextDecoderRunner(
Expand Down
3 changes: 2 additions & 1 deletion extension/llm/runner/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
#pragma once
#include <cstdint>
// patternlint-disable-next-line executorch-cpp-nostdinc
#include <executorch/runtime/platform/compiler.h>
#include <vector>

namespace executorch {
namespace extension {
namespace llm {

struct Image {
struct ET_EXPERIMENTAL Image {
// Assuming NCHW format
std::vector<uint8_t> data;
int32_t width;
Expand Down
3 changes: 2 additions & 1 deletion extension/llm/runner/image_prefiller.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@

#include <executorch/extension/llm/runner/image.h>
#include <executorch/extension/module/module.h>
#include <executorch/runtime/platform/compiler.h>

namespace executorch {
namespace extension {
namespace llm {

// Assuming kv cache and parallel prefill are enabled.
class ImagePrefiller {
class ET_EXPERIMENTAL ImagePrefiller {
public:
explicit ImagePrefiller(::executorch::extension::Module* module)
: module_(module) {}
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/runner/multimodal_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace executorch {
namespace extension {
namespace llm {

class MultimodalRunner {
class ET_EXPERIMENTAL MultimodalRunner {
public:
explicit MultimodalRunner(
const std::string& model_path,
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/runner/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace executorch {
namespace extension {
namespace llm {

struct Stats {
struct ET_EXPERIMENTAL Stats {
// Scaling factor for timestamps - in this case, we use ms.
const long SCALING_FACTOR_UNITS_PER_SECOND = 1000;
// Time stamps for the different stages of the execution
Expand Down
3 changes: 2 additions & 1 deletion extension/llm/runner/text_decoder_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
#include <executorch/extension/llm/sampler/sampler.h>
#include <executorch/extension/module/module.h>
#include <executorch/extension/tensor/tensor.h>
#include <executorch/runtime/platform/compiler.h>
// patternlint-disable-next-line executorch-cpp-nostdinc
#include <functional>

namespace executorch {
namespace extension {
namespace llm {

class TextDecoderRunner {
class ET_EXPERIMENTAL TextDecoderRunner {
public:
TextDecoderRunner(
Module* module,
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/runner/text_prefiller.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace executorch {
namespace extension {
namespace llm {

class TextPrefiller {
class ET_EXPERIMENTAL TextPrefiller {
public:
TextPrefiller(
TextDecoderRunner* text_decoder_runner,
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/runner/text_token_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace executorch {
namespace extension {
namespace llm {

class TextTokenGenerator {
class ET_EXPERIMENTAL TextTokenGenerator {
public:
TextTokenGenerator(
Tokenizer* tokenizer,
Expand Down
7 changes: 4 additions & 3 deletions extension/llm/runner/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#pragma once
#include <executorch/runtime/platform/compiler.h>
#include <stdio.h>
#include <time.h>
#include <cctype>
Expand All @@ -18,7 +19,7 @@ namespace executorch {
namespace extension {
namespace llm {

void inline safe_printf(const char* piece) {
ET_EXPERIMENTAL void inline safe_printf(const char* piece) {
// piece might be a raw byte token, and we only want to print printable chars
// or whitespace because some of the other bytes can be various control codes,
// backspace, etc.
Expand All @@ -40,7 +41,7 @@ void inline safe_printf(const char* piece) {
// ----------------------------------------------------------------------------
// utilities: time

long inline time_in_ms() {
ET_EXPERIMENTAL long inline time_in_ms() {
// return time in milliseconds, for benchmarking the model speed
struct timespec time;
clock_gettime(CLOCK_REALTIME, &time);
Expand All @@ -54,7 +55,7 @@ long inline time_in_ms() {
// RSS: Resident Set Size, the amount of memory currently in the RAM for this
// process. These values are approximate, and are only used for logging
// purposes.
size_t inline get_rss_bytes() {
ET_EXPERIMENTAL size_t inline get_rss_bytes() {
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
struct rusage r_usage;
if (getrusage(RUSAGE_SELF, &r_usage) == 0) {
Expand Down
5 changes: 3 additions & 2 deletions extension/llm/sampler/sampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,20 @@
#endif

#include <executorch/runtime/core/exec_aten/exec_aten.h>
#include <executorch/runtime/platform/compiler.h>

namespace executorch {
namespace extension {
namespace llm {
// A simple llama2 sampler.

template <typename T>
struct ProbIndex {
struct ET_EXPERIMENTAL ProbIndex {
T prob;
int32_t index;
}; // struct used when sorting probabilities during top-p sampling

class Sampler {
class ET_EXPERIMENTAL Sampler {
public:
Sampler(
int32_t vocab_size,
Expand Down
1 change: 1 addition & 0 deletions extension/llm/sampler/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ def define_common_targets():
] if aten else [],
exported_deps = [
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
"//executorch/runtime/platform:compiler",
],
)
2 changes: 1 addition & 1 deletion extension/llm/tokenizer/bpe_tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct TokenIndex {

// A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code
// won't work with this class, it needs to go through tokenizer.py first.
class BPETokenizer : public Tokenizer {
class ET_EXPERIMENTAL BPETokenizer : public Tokenizer {
public:
explicit BPETokenizer();
~BPETokenizer() override;
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/tokenizer/tiktoken.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ using Encoder = std::unordered_map<std::string, uint64_t>;
using Decoder = std::unordered_map<uint64_t, std::string>;
using Re2UPtr = std::unique_ptr<re2::RE2>;

class Tiktoken : public Tokenizer {
class ET_EXPERIMENTAL Tiktoken : public Tokenizer {
public:
/**
* @param[in] special_tokens List of special tokens including bos, eos;
Expand Down
3 changes: 2 additions & 1 deletion extension/llm/tokenizer/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@

#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/result.h>
#include <executorch/runtime/platform/compiler.h>

namespace executorch {
namespace extension {
namespace llm {

// A tokenizer interface.
class Tokenizer {
class ET_EXPERIMENTAL Tokenizer {
public:
explicit Tokenizer() {}
virtual ~Tokenizer() {}
Expand Down
Loading