We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a5f19bb commit 6997fadCopy full SHA for 6997fad
src/llama-model.cpp
@@ -12216,7 +12216,7 @@ struct llm_build_olmo2 : public llm_graph_context {
12216
const bool is_swa = hparams.is_swa(il);
12217
12218
if (is_swa) {
12219
- // For sliding window layers, Olmo3 does not use rope scaling.
+ // For sliding window layers, Olmo3 use regular rope with no yarn rope scaling.
12220
// This is achieved here by setting freq_scale and attn_factor to 1.
12221
// We also set ext_factor to 0 to avoid a few unnecessary computations.
12222
Qcur = ggml_rope_ext(
0 commit comments