File tree Expand file tree Collapse file tree 4 files changed +10
-10
lines changed
examples/models/llama/runner Expand file tree Collapse file tree 4 files changed +10
-10
lines changed Original file line number Diff line number Diff line change @@ -129,17 +129,17 @@ Error Runner::load() {
129129 temperature_);
130130 text_prefiller_ = std::make_unique<llm::TextPrefiller>(
131131 text_decoder_runner_.get (),
132- metadata_.at (kUseInt32Token ),
133132 metadata_.at (kUseKVCache ),
134- metadata_.at (kEnableDynamicShape ));
133+ metadata_.at (kEnableDynamicShape ),
134+ metadata_.at (kUseInt32Token ));
135135
136136 text_token_generator_ = std::make_unique<llm::TextTokenGenerator>(
137137 tokenizer_.get (),
138138 text_decoder_runner_.get (),
139- metadata_.at (kUseInt32Token ),
140139 metadata_.at (kUseKVCache ),
141140 std::move (eos_ids),
142- &stats_);
141+ &stats_,
142+ metadata_.at (kUseInt32Token ));
143143
144144 return Error::Ok;
145145}
Original file line number Diff line number Diff line change @@ -17,9 +17,9 @@ namespace llm {
1717
1818TextPrefiller::TextPrefiller (
1919 TextDecoderRunner* text_decoder_runner,
20- bool use_int32_token,
2120 bool use_kv_cache,
22- bool enable_parallel_prefill)
21+ bool enable_parallel_prefill,
22+ bool use_int32_token)
2323 : text_decoder_runner_(text_decoder_runner),
2424 use_int32_token_ (use_int32_token),
2525 use_kv_cache_(use_kv_cache),
Original file line number Diff line number Diff line change @@ -24,9 +24,9 @@ class ET_EXPERIMENTAL TextPrefiller {
2424 public:
2525 TextPrefiller (
2626 TextDecoderRunner* text_decoder_runner,
27- bool use_int32_token,
2827 bool use_kv_cache_,
29- bool enable_parallel_prefill);
28+ bool enable_parallel_prefill,
29+ bool use_int32_token = false );
3030 /* *
3131 * Prefill an LLM Module with the given text input.
3232 * @param prompt_tokens The text prompt tokens to the LLM Module. Encoded by
Original file line number Diff line number Diff line change @@ -23,10 +23,10 @@ class ET_EXPERIMENTAL TextTokenGenerator {
2323 TextTokenGenerator (
2424 Tokenizer* tokenizer,
2525 TextDecoderRunner* text_decoder_runner,
26- bool use_int32_token,
2726 bool use_kv_cache,
2827 std::unique_ptr<std::unordered_set<uint64_t >>&& eos_ids,
29- Stats* stats)
28+ Stats* stats,
29+ bool use_int32_token = false )
3030 : tokenizer_(tokenizer),
3131 text_decoder_runner_ (text_decoder_runner),
3232 eos_ids_(std::move(eos_ids)),
You can’t perform that action at this time.
0 commit comments