Skip to content

Commit 763a474

Browse files
Improve warnings for qnn runner (#15716)
1 parent cf0e9aa commit 763a474

File tree

2 files changed

+40
-1
lines changed

2 files changed

+40
-1
lines changed

examples/qualcomm/oss_scripts/llama/runner/runner.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,22 @@ Error Runner<T>::generate_from_prompt_or_file(
378378
stats_.inference_start_ms = time_in_ms();
379379

380380
int32_t seq_len = config.seq_len;
381-
seq_len = (seq_len > 0 && seq_len <= context_len_) ? seq_len : context_len_;
381+
if (seq_len > context_len_) {
382+
ET_LOG(
383+
Info,
384+
"Warning: Requested seq_len (%d) exceeds compiled max_seq_len (%d). Clamping to %d.",
385+
seq_len,
386+
context_len_,
387+
context_len_);
388+
seq_len = context_len_;
389+
} else if (seq_len <= 0) {
390+
ET_LOG(
391+
Info,
392+
"Warning: Invalid seq_len (%d). Using compiled max_seq_len (%d).",
393+
seq_len,
394+
context_len_);
395+
seq_len = context_len_;
396+
}
382397
int32_t n_bos = (cur_pos_ == 0) ? 1 : 0;
383398

384399
// encode the (string) prompt into tokens sequence

examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,30 @@ Result<int64_t> TokenGenerator<T>::generate(
323323
break;
324324
}
325325
}
326+
327+
// Check if generation was truncated due to seq_len limit (no EOS token)
328+
if (eos_ids_->count(cur_token) == 0 && pos >= seq_len - 1) {
329+
printf("\n");
330+
ET_LOG(
331+
Info,
332+
"Warning: Generation stopped at seq_len limit (%d) without reaching EOS token. Response may be incomplete.",
333+
seq_len);
334+
if (seq_len >= metadata_.context_len) {
335+
ET_LOG(
336+
Info,
337+
"- seq_len (%d) already equals compiled max_seq_len (%d). Consider recompiling with larger --max_seq_len.",
338+
seq_len,
339+
metadata_.context_len);
340+
} else {
341+
ET_LOG(
342+
Info,
343+
"- seq_len (%d) is less than compiled max_seq_len (%d). Consider increasing --seq_len (up to %d).",
344+
seq_len,
345+
metadata_.context_len,
346+
metadata_.context_len);
347+
}
348+
}
349+
326350
return pos - start_pos;
327351
}
328352
// Explicit instantiations

0 commit comments

Comments
 (0)