File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed 
examples/models/llama/runner Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -31,6 +31,7 @@ static constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
3131static  constexpr  auto  kBosId  = " get_bos_id" 
3232static  constexpr  auto  kEosIds  = " get_eos_ids" 
3333static  constexpr  auto  kMaxSeqLen  = " get_max_seq_len" 
34+ static  constexpr  auto  kMaxContextLen  = " get_max_context_len" 
3435static  constexpr  auto  kVocabSize  = " get_vocab_size" 
3536static  constexpr  auto  kUseKVCache  = " use_kv_cache" 
3637static  constexpr  auto  kUseSDPAWithKVCache  = " use_sdpa_with_kv_cache" 
@@ -49,6 +50,7 @@ Runner::Runner(
4950      metadata_({
5051          {kEnableDynamicShape , false },
5152          {kMaxSeqLen , 128 },
53+           {kMaxContextLen , 128 },
5254          {kUseKVCache , true },
5355          {kUseSDPAWithKVCache , false },
5456      }) {
@@ -201,9 +203,9 @@ Error Runner::generate(
201203  shouldStop_ = false ;
202204
203205  //  Set the sequence length to the max seq length if not provided
204-   seq_len = (seq_len > 0  && seq_len <= metadata_.at (kMaxSeqLen ))
206+   seq_len = (seq_len > 0  && seq_len <= metadata_.at (kMaxContextLen ))
205207      ? seq_len
206-       : metadata_.at (kMaxSeqLen );
208+       : metadata_.at (kMaxContextLen );
207209
208210  ::tokenizers::Result<std::vector<uint64_t >> encode_res = tokenizer_->encode (
209211      prompt,
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments