Skip to content

Commit bed565d

Browse files
committed
docs for kimi-vl
1 parent e47bd3d commit bed565d

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

docs/models.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,11 @@ Please use `--format completion` for these models.
305305
* Kimi (`KimiVLForConditionalGeneration`)
306306
* [x] VL: [A3B-Instruct](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/tree/7a3c132a7b0f1f1677f5a72f258bd3afded7d357), [A3B-Thinking](https://huggingface.co/moonshotai/Kimi-VL-A3B-Thinking/commit/16681d8ac24e505088698e4e34ea494dd6e24400), [A3B-Thinking-2506](https://huggingface.co/moonshotai/Kimi-VL-A3B-Thinking-2506/tree/f124f44fb6ab5778cfac5117e3902ef03e860ad4)
307307

308+
Additional options (Use `--set X Y` to change values):
309+
* `video_max_frames`: default 20.
310+
* `native_resolution`: use native resolution or not, default: `false` (This seems sensitive to quantization, so defaults to `false`).
311+
* `fps`: Default 1.0.
312+
308313
* Qwen (`Qwen2AudioForConditionalGeneration`)
309314
* [x] Qwen2-Audio: [7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct/tree/0a095220c30b7b31434169c3086508ef3ea5bf0a)
310315

models/kimi.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -587,9 +587,9 @@ namespace vl
587587
int media_end_token_id;
588588
int media_pad_token_id;
589589

590-
int video_max_frames = 20;
591-
bool arbitrary_resolution = false;
592-
double fps = 1.0;
590+
int video_max_frames = 20;
591+
bool native_resolution = false;
592+
double fps = 1.0;
593593
};
594594

595595
void ChatHistoryEncoder::append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const
@@ -686,7 +686,7 @@ namespace vl
686686
{
687687
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
688688
tok->video_max_frames = utils::get_opt(args, "video_max_frames", tok->video_max_frames);
689-
tok->arbitrary_resolution = utils::get_opt(args, "arbitrary_resolution", false);
689+
tok->native_resolution = utils::get_opt(args, "native_resolution", tok->native_resolution);
690690
tok->fps = utils::get_opt(args, "fps", tok->fps);
691691
}
692692

@@ -714,7 +714,7 @@ namespace vl
714714
std::unique_ptr<vision::Resize> resize;
715715
std::unique_ptr<vision::PreMaxImageSize> max_size;
716716

717-
if (!tok->arbitrary_resolution)
717+
if (!tok->native_resolution)
718718
resize.reset(new vision::Resize(896, 896));
719719

720720
// expand video into images

0 commit comments

Comments
 (0)