diff --git a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h index cf500d7e431..21f03d5aefc 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h +++ b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h @@ -51,9 +51,9 @@ class LhdTokenGenerator : public TokenGenerator { metadata.use_int64_token}, stats), metadata_(metadata), - ngrams_pool_(metadata.vocab_size, metadata.ngram, metadata.gcap), lhd_branch_(metadata.ngram - 1, std::vector(metadata.window)), - lhd_branch_prev_(metadata.window) { + lhd_branch_prev_(metadata.window), + ngrams_pool_(metadata.vocab_size, metadata.ngram, metadata.gcap) { ET_LOG( Info, "Use Lookahead decoding: ngram=%d, window=%d, gcap=%d", diff --git a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp index da20517925b..8939347a062 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp @@ -27,8 +27,8 @@ TokenGenerator::TokenGenerator( kv_manager_(kv_manager), method_name_(method_name), eos_ids_(std::move(eos_ids)), - metadata_(metadata), - stats_(stats) { + stats_(stats), + metadata_(metadata) { k_cache_in_.resize(metadata_.num_layers); v_cache_in_.resize(metadata_.num_layers); k_cache_out_.resize(metadata_.num_layers);