Skip to content

Commit 1cd5eeb

Browse files
committed
kimi-vl: default to use resizing.
1 parent 0cd05ad commit 1cd5eeb

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

models/kimi.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,7 @@ namespace vl
588588
int media_pad_token_id;
589589

590590
int video_max_frames = 20;
591+
bool arbitrary_resolution = false;
591592
};
592593

593594
void ChatHistoryEncoder::append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const
@@ -683,7 +684,8 @@ namespace vl
683684
void set_additional_args(const std::map<std::string, std::string> &args) override
684685
{
685686
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
686-
tok->video_max_frames = utils::get_opt(args, "video_max_frames", tok->video_max_frames);
687+
tok->video_max_frames = utils::get_opt(args, "video_max_frames", tok->video_max_frames);
688+
tok->arbitrary_resolution = utils::get_opt(args, "arbitrary_resolution", false);
687689
}
688690

689691
void before_generate(const GenerationConfig &gen_config) override
@@ -750,6 +752,10 @@ namespace vl
750752
vision::MaxGridHeight param3(512);
751753
vision::MaxGridWidth param4(512);
752754

755+
std::unique_ptr<vision::Resize> resize;
756+
if (!tok->arbitrary_resolution)
757+
resize.reset(new vision::Resize(896, 896));
758+
753759
vision::image_load(piece.content.c_str(), pixels, w, h, patch_size, vision::PaddingMode::Black);
754760

755761
std::vector<float> scaled;

0 commit comments

Comments
 (0)