10
10
// processing logic.
11
11
#pragma once
12
12
13
+ #include < executorch/examples/models/llava/runner/llava_image_prefiller.h>
14
+ #include < executorch/extension/llm/runner/image.h>
15
+ #include < executorch/extension/llm/runner/io_manager/io_manager.h>
16
+ #include < executorch/extension/llm/runner/irunner.h>
17
+ #include < executorch/extension/llm/runner/stats.h>
18
+ #include < executorch/extension/llm/runner/text_decoder_runner.h>
19
+ #include < executorch/extension/llm/runner/text_prefiller.h>
20
+ #include < executorch/extension/llm/runner/text_token_generator.h>
21
+ #include < executorch/extension/module/module.h>
13
22
#include < cstdint>
14
23
#include < functional>
15
24
#include < memory>
16
25
#include < string>
17
- #include < type_traits>
18
- #include < unordered_map>
19
-
20
- #include < executorch/extension/llm/runner/multimodal_runner.h>
21
26
22
27
namespace example {
23
28
24
- class ET_EXPERIMENTAL LlavaRunner
25
- : public ::executorch::extension::llm::MultimodalRunner {
29
+ using executorch::extension::Module;
30
+ using executorch::extension::llm::ImagePrefiller;
31
+ using executorch::extension::llm::IOManager;
32
+ using executorch::extension::llm::Stats;
33
+ using executorch::extension::llm::TextDecoderRunner;
34
+ using executorch::extension::llm::TextPrefiller;
35
+ using executorch::extension::llm::TextTokenGenerator;
36
+
37
+ class ET_EXPERIMENTAL LlavaRunner {
26
38
public:
27
39
explicit LlavaRunner (
28
40
const std::string& model_path,
29
41
const std::string& tokenizer_path,
30
42
const float temperature = 0 .8f )
31
- : MultimodalRunner(model_path, tokenizer_path, temperature){};
43
+ : temperature_(temperature),
44
+ module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
45
+ io_manager_(std::make_unique<IOManager>()),
46
+ tokenizer_path_(tokenizer_path) {
47
+ ET_LOG (
48
+ Info,
49
+ " Creating Llava runner: model_path=%s, tokenizer_path=%s" ,
50
+ model_path.c_str (),
51
+ tokenizer_path.c_str ());
52
+ }
32
53
33
- bool is_loaded () override ;
54
+ bool is_loaded ();
34
55
35
- ::executorch::runtime::Error load () override ;
56
+ ::executorch::runtime::Error load ();
36
57
37
58
::executorch::runtime::Error generate (
38
59
std::vector<::executorch::extension::llm::Image> images,
@@ -41,17 +62,17 @@ class ET_EXPERIMENTAL LlavaRunner
41
62
std::function<void (const std::string&)> token_callback = {},
42
63
std::function<void (const ::executorch::extension::llm::Stats&)>
43
64
stats_callback = {},
44
- bool echo = true ) override ;
65
+ bool echo = true );
45
66
46
67
::executorch::runtime::Error prefill_images (
47
68
std::vector<::executorch::extension::llm::Image>& images,
48
- int64_t & start_pos) override ;
69
+ int64_t & start_pos);
49
70
50
71
::executorch::runtime::Result<uint64_t > prefill_prompt (
51
72
const std::string& prompt,
52
73
int64_t & start_pos,
53
74
int8_t bos = 0 ,
54
- int8_t eos = 0 ) override ;
75
+ int8_t eos = 0 );
55
76
56
77
::executorch::runtime::Error generate_from_pos (
57
78
const std::string& prompt,
@@ -60,9 +81,30 @@ class ET_EXPERIMENTAL LlavaRunner
60
81
std::function<void (const std::string&)> token_callback = {},
61
82
std::function<void (const ::executorch::extension::llm::Stats&)>
62
83
stats_callback = {},
63
- bool echo = true ) override ;
84
+ bool echo = true );
85
+
86
+ inline void stop () {
87
+ text_token_generator_->stop ();
88
+ }
64
89
65
90
private:
91
+ // metadata
92
+ float temperature_;
93
+
94
+ // model
95
+ std::unordered_set<std::string> model_methods_;
96
+ std::unique_ptr<Module> module_;
97
+ std::unique_ptr<TextDecoderRunner> text_decoder_runner_;
98
+ std::unique_ptr<TextPrefiller> text_prefiller_;
99
+ std::unique_ptr<LlavaImagePrefiller> image_prefiller_;
100
+ std::unique_ptr<IOManager> io_manager_;
101
+ std::unique_ptr<TextTokenGenerator> text_token_generator_;
102
+ std::string tokenizer_path_;
103
+ std::unique_ptr<::tokenizers::Tokenizer> tokenizer_;
104
+
105
+ // stats
106
+ Stats stats_;
107
+
66
108
inline static const char * kPresetPrompt =
67
109
" A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: " ;
68
110
};
0 commit comments