@@ -138,8 +138,18 @@ def parse_args():
138138 help = "Number of files per subdirectory." ,
139139 )
140140
141+ # vlm related args
142+ vlm_group = parser .add_argument_group ("vlm" )
143+ vlm_group .add_argument (
144+ "--min-pixels" , type = int , default = 50176
145+ ) # 64*28*28 for qwen2.5-vl
146+ vlm_group .add_argument (
147+ "--max-pixels" , type = int , default = 802816
148+ ) # 1024*28*28 for qwen2.5-vl
149+
141150 sglang_group = parser .add_argument_group ("sglang" )
142151 SGLangBackendArgs .add_args (sglang_group )
152+
143153 return parser .parse_args ()
144154
145155
@@ -187,7 +197,10 @@ def build_target_model(
187197 target_model .set_aux_hidden_states_layers (args .aux_hidden_states_layers )
188198
189199 if args .is_vlm :
190- processor = AutoProcessor .from_pretrained (args .target_model_path )
200+ processor = AutoProcessor .from_pretrained (args .target_model_path ,
201+ min_pixels = args .min_pixels ,
202+ max_pixels = args .max_pixels ,
203+ )
191204 else :
192205 processor = None
193206
@@ -583,6 +596,8 @@ def main():
583596 args .target_model_path , trust_remote_code = True
584597 )
585598 cache_params_string = f"{ args .data_path } -{ args .max_length } -{ args .chat_template } -{ args .target_model_path } -{ args .num_samples } -{ args .is_preformatted } "
599+ if args .is_vlm :
600+ cache_params_string = f'{ cache_params_string } -{ args .min_pixels } -{ args .max_pixels } '
586601 cache_key = hashlib .md5 (cache_params_string .encode ()).hexdigest ()
587602
588603 # Preprocess on complete, un-sharded dataset
0 commit comments