pytorch · larryliu0820 · Oct 10, 2024
@@ -26,7 +26,7 @@
 
 namespace example {
 
-class Runner {
+class ET_EXPERIMENTAL Runner {
  public:
   explicit Runner(
       const std::string& model_path,

@@ -15,7 +15,7 @@
 
 namespace example {
 
-class LlavaImagePrefiller
+class ET_EXPERIMENTAL LlavaImagePrefiller
     : public ::executorch::extension::llm::ImagePrefiller {
  public:
   LlavaImagePrefiller(::executorch::extension::Module* module)

@@ -21,7 +21,8 @@
 
 namespace example {
 
-class LlavaRunner : public ::executorch::extension::llm::MultimodalRunner {
+class ET_EXPERIMENTAL LlavaRunner
+    : public ::executorch::extension::llm::MultimodalRunner {
  public:
   explicit LlavaRunner(
       const std::string& model_path,

@@ -14,7 +14,7 @@
 
 namespace example {
 
-class LlavaTextDecoderRunner
+class ET_EXPERIMENTAL LlavaTextDecoderRunner
     : public executorch::extension::llm::TextDecoderRunner {
  public:
   LlavaTextDecoderRunner(

@@ -11,13 +11,14 @@
 #pragma once
 #include <cstdint>
 // patternlint-disable-next-line executorch-cpp-nostdinc
+#include <executorch/runtime/platform/compiler.h>
 #include <vector>
 
 namespace executorch {
 namespace extension {
 namespace llm {
 
-struct Image {
+struct ET_EXPERIMENTAL Image {
   // Assuming NCHW format
   std::vector<uint8_t> data;
   int32_t width;

@@ -12,13 +12,14 @@
 
 #include <executorch/extension/llm/runner/image.h>
 #include <executorch/extension/module/module.h>
+#include <executorch/runtime/platform/compiler.h>
 
 namespace executorch {
 namespace extension {
 namespace llm {
 
 // Assuming kv cache and parallel prefill are enabled.
-class ImagePrefiller {
+class ET_EXPERIMENTAL ImagePrefiller {
  public:
   explicit ImagePrefiller(::executorch::extension::Module* module)
       : module_(module) {}

@@ -36,7 +36,7 @@ namespace executorch {
 namespace extension {
 namespace llm {
 
-class MultimodalRunner {
+class ET_EXPERIMENTAL MultimodalRunner {
  public:
   explicit MultimodalRunner(
       const std::string& model_path,

@@ -19,7 +19,7 @@ namespace executorch {
 namespace extension {
 namespace llm {
 
-struct Stats {
+struct ET_EXPERIMENTAL Stats {
   // Scaling factor for timestamps - in this case, we use ms.
   const long SCALING_FACTOR_UNITS_PER_SECOND = 1000;
   // Time stamps for the different stages of the execution

@@ -13,14 +13,15 @@
 #include <executorch/extension/llm/sampler/sampler.h>
 #include <executorch/extension/module/module.h>
 #include <executorch/extension/tensor/tensor.h>
+#include <executorch/runtime/platform/compiler.h>
 // patternlint-disable-next-line executorch-cpp-nostdinc
 #include <functional>
 
 namespace executorch {
 namespace extension {
 namespace llm {
 
-class TextDecoderRunner {
+class ET_EXPERIMENTAL TextDecoderRunner {
  public:
   TextDecoderRunner(
       Module* module,

@@ -20,7 +20,7 @@ namespace executorch {
 namespace extension {
 namespace llm {
 
-class TextPrefiller {
+class ET_EXPERIMENTAL TextPrefiller {
  public:
   TextPrefiller(
       TextDecoderRunner* text_decoder_runner,

@@ -18,7 +18,7 @@ namespace executorch {
 namespace extension {
 namespace llm {
 
-class TextTokenGenerator {
+class ET_EXPERIMENTAL TextTokenGenerator {
  public:
   TextTokenGenerator(
       Tokenizer* tokenizer,

@@ -7,6 +7,7 @@
  */
 
 #pragma once
+#include <executorch/runtime/platform/compiler.h>
 #include <stdio.h>
 #include <time.h>
 #include <cctype>
@@ -18,7 +19,7 @@ namespace executorch {
 namespace extension {
 namespace llm {
 
-void inline safe_printf(const char* piece) {
+ET_EXPERIMENTAL void inline safe_printf(const char* piece) {
   // piece might be a raw byte token, and we only want to print printable chars
   // or whitespace because some of the other bytes can be various control codes,
   // backspace, etc.
@@ -40,7 +41,7 @@ void inline safe_printf(const char* piece) {
 // ----------------------------------------------------------------------------
 // utilities: time
 
-long inline time_in_ms() {
+ET_EXPERIMENTAL long inline time_in_ms() {
   // return time in milliseconds, for benchmarking the model speed
   struct timespec time;
   clock_gettime(CLOCK_REALTIME, &time);
@@ -54,7 +55,7 @@ long inline time_in_ms() {
 // RSS: Resident Set Size, the amount of memory currently in the RAM for this
 // process. These values are approximate, and are only used for logging
 // purposes.
-size_t inline get_rss_bytes() {
+ET_EXPERIMENTAL size_t inline get_rss_bytes() {
 #if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
   struct rusage r_usage;
   if (getrusage(RUSAGE_SELF, &r_usage) == 0) {

@@ -19,19 +19,20 @@
 #endif
 
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/platform/compiler.h>
 
 namespace executorch {
 namespace extension {
 namespace llm {
 // A simple llama2 sampler.
 
 template <typename T>
-struct ProbIndex {
+struct ET_EXPERIMENTAL ProbIndex {
   T prob;
   int32_t index;
 }; // struct used when sorting probabilities during top-p sampling
 
-class Sampler {
+class ET_EXPERIMENTAL Sampler {
  public:
   Sampler(
       int32_t vocab_size,

@@ -23,5 +23,6 @@ def define_common_targets():
             ] if aten else [],
             exported_deps = [
                 "//executorch/runtime/core/exec_aten:lib" + aten_suffix,
+                "//executorch/runtime/platform:compiler",
             ],
         )
@@ -22,7 +22,7 @@ struct TokenIndex {
 
 // A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code
 // won't work with this class, it needs to go through tokenizer.py first.
-class BPETokenizer : public Tokenizer {
+class ET_EXPERIMENTAL BPETokenizer : public Tokenizer {
  public:
   explicit BPETokenizer();
   ~BPETokenizer() override;

@@ -22,7 +22,7 @@ using Encoder = std::unordered_map<std::string, uint64_t>;
 using Decoder = std::unordered_map<uint64_t, std::string>;
 using Re2UPtr = std::unique_ptr<re2::RE2>;
 
-class Tiktoken : public Tokenizer {
+class ET_EXPERIMENTAL Tiktoken : public Tokenizer {
  public:
   /**
    * @param[in] special_tokens List of special tokens including bos, eos;

@@ -16,13 +16,14 @@
 
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/result.h>
+#include <executorch/runtime/platform/compiler.h>
 
 namespace executorch {
 namespace extension {
 namespace llm {
 
 // A tokenizer interface.
-class Tokenizer {
+class ET_EXPERIMENTAL Tokenizer {
  public:
   explicit Tokenizer() {}
   virtual ~Tokenizer() {}