diff --git a/internvl_chat/internvl/model/internvl_chat/configuration_internvl_chat.py b/internvl_chat/internvl/model/internvl_chat/configuration_internvl_chat.py index 80abf7cba..f70b27c2f 100644 --- a/internvl_chat/internvl/model/internvl_chat/configuration_internvl_chat.py +++ b/internvl_chat/internvl/model/internvl_chat/configuration_internvl_chat.py @@ -34,7 +34,7 @@ def __init__( template=None, dynamic_image_size=False, use_thumbnail=False, - ps_version='v1', + pixel_unshuffle_version='v1', min_dynamic_patch=1, max_dynamic_patch=6, **kwargs): @@ -69,7 +69,7 @@ def __init__( self.template = template self.dynamic_image_size = dynamic_image_size self.use_thumbnail = use_thumbnail - self.ps_version = ps_version # pixel shuffle version + self.pixel_unshuffle_version = pixel_unshuffle_version # pixel unshuffle version self.min_dynamic_patch = min_dynamic_patch self.max_dynamic_patch = max_dynamic_patch @@ -79,7 +79,7 @@ def __init__( self.llm_config.tie_word_embeddings = self.tie_word_embeddings logger.info(f'vision_select_layer: {self.select_layer}') - logger.info(f'ps_version: {self.ps_version}') + logger.info(f'pixel_unshuffle_version: {self.pixel_unshuffle_version}') logger.info(f'min_dynamic_patch: {self.min_dynamic_patch}') logger.info(f'max_dynamic_patch: {self.max_dynamic_patch}') @@ -102,7 +102,7 @@ def to_dict(self): output['template'] = self.template output['dynamic_image_size'] = self.dynamic_image_size output['use_thumbnail'] = self.use_thumbnail - output['ps_version'] = self.ps_version + output['pixel_unshuffle_version'] = self.pixel_unshuffle_version output['min_dynamic_patch'] = self.min_dynamic_patch output['max_dynamic_patch'] = self.max_dynamic_patch diff --git a/internvl_chat/internvl/model/internvl_chat/modeling_internvl_chat.py b/internvl_chat/internvl/model/internvl_chat/modeling_internvl_chat.py index 53f97396c..2d5b19f3d 100644 --- a/internvl_chat/internvl/model/internvl_chat/modeling_internvl_chat.py +++ b/internvl_chat/internvl/model/internvl_chat/modeling_internvl_chat.py @@ -56,7 +56,7 @@ def __init__(self, config: InternVLChatConfig, vision_model=None, language_model self.template = config.template self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2)) self.downsample_ratio = config.downsample_ratio - self.ps_version = config.ps_version + self.pixel_unshuffle_version = config.pixel_unshuffle_version self.llm_arch_name = config.llm_config.architectures[0] # Enable Flash Attention if supported, otherwise fall back to eager attention. use_flash_attn = use_flash_attn if has_flash_attn else False @@ -64,7 +64,7 @@ def __init__(self, config: InternVLChatConfig, vision_model=None, language_model config.llm_config.attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager' logger.info(f'num_image_token: {self.num_image_token}') - logger.info(f'ps_version: {self.ps_version}') + logger.info(f'pixel_unshuffle_version: {self.pixel_unshuffle_version}') if vision_model is not None: self.vision_model = vision_model else: @@ -254,7 +254,7 @@ def forward( attentions=outputs.attentions, ) - def pixel_shuffle(self, x, scale_factor=0.5): + def pixel_unshuffle(self, x, scale_factor=0.5): n, w, h, c = x.size() # N, W, H, C --> N, W, H * scale, C // scale x = x.view(n, w, int(h * scale_factor), int(c / scale_factor)) @@ -263,8 +263,8 @@ def pixel_shuffle(self, x, scale_factor=0.5): # N, H * scale, W, C // scale --> N, H * scale, W * scale, C // (scale ** 2) x = x.view(n, int(h * scale_factor), int(w * scale_factor), int(c / (scale_factor * scale_factor))) - if self.ps_version == 'v1': - warnings.warn("In ps_version 'v1', the height and width have not been swapped back, " + if self.pixel_unshuffle_version == 'v1': + warnings.warn("In pixel_unshuffle_version 'v1', the height and width have not been swapped back, " 'which results in a transposed image.') else: x = x.permute(0, 2, 1, 3).contiguous() @@ -285,7 +285,7 @@ def extract_feature(self, pixel_values): h = w = int(vit_embeds.shape[1] ** 0.5) vit_embeds = vit_embeds.reshape(vit_embeds.shape[0], h, w, -1) - vit_embeds = self.pixel_shuffle(vit_embeds, scale_factor=self.downsample_ratio) + vit_embeds = self.pixel_unshuffle(vit_embeds, scale_factor=self.downsample_ratio) vit_embeds = vit_embeds.reshape(vit_embeds.shape[0], -1, vit_embeds.shape[-1]) vit_embeds = self.mlp1(vit_embeds) return vit_embeds diff --git a/internvl_chat/internvl/train/internvl_chat_finetune.py b/internvl_chat/internvl/train/internvl_chat_finetune.py index 42f669437..4188164b5 100644 --- a/internvl_chat/internvl/train/internvl_chat_finetune.py +++ b/internvl_chat/internvl/train/internvl_chat_finetune.py @@ -145,10 +145,15 @@ class ModelArguments: default=0.0, metadata={'help': 'Set the drop path rate for the ViT. Default is 0.'}, ) - ps_version: Literal['v1', 'v2'] = field( + pixel_unshuffle_version: Literal['v1', 'v2'] = field( default='v2', metadata={'help': 'Specify the version of pixel shuffle implementation. Default is v2.'} ) + # Deprecated alias for pixel_unshuffle_version; keep temporarily for backward compatibility + ps_version: Optional[Literal['v1', 'v2']] = field( + default=None, + metadata={'help': 'DEPRECATED: use --pixel_unshuffle_version instead.'} + ) use_fast_tokenizer: bool = field( default=False, metadata={'help': 'Set to True to use the fast mode of the tokenizer.'} @@ -827,6 +832,34 @@ def main(): handlers=[logging.StreamHandler(sys.stdout)], ) + # -------------------- begin: migrate deprecated args (ps_version -> pixel_unshuffle_version) -------------------- + + # !! IF `ps_version` IS READY TO BE DEPRECATED, REMOVE THE FOLLOWING BLOCK COMPLETELY !! + + # Getting the "default value" of the new argument to determine if the user has explicitly set it + _default_new = ModelArguments.__dataclass_fields__['pixel_unshuffle_version'].default + new_is_default = (model_args.pixel_unshuffle_version == _default_new) + + # If the old argument is provided, issue a deprecation warning and map it + if getattr(model_args, 'ps_version', None) is not None: + warnings.warn( + '`--ps_version` is deprecated and will be removed; use `--pixel_unshuffle_version` instead.', + DeprecationWarning + ) + # If the new argument appears to have been explicitly set, and conflicts with the old value → take the new argument + if not new_is_default and model_args.ps_version != model_args.pixel_unshuffle_version: + logger.warning( + f'Both ps_version={model_args.ps_version} (deprecated) and ' + f'pixel_unshuffle_version={model_args.pixel_unshuffle_version} are provided; ' + f'using pixel_unshuffle_version.' + ) + else: + # Else, map the old value to the new argument + model_args.pixel_unshuffle_version = model_args.ps_version + + logger.info(f'Pixel unshuffle version = {model_args.pixel_unshuffle_version}') + # -------------------- end: migrate deprecated args ------------------------------------------------------------ + if training_args.should_log: # The default of training_args.log_level is passive, so we set log level at info here to have that default. transformers.utils.logging.set_verbosity_info() @@ -903,7 +936,7 @@ def main(): config.select_layer = model_args.vision_select_layer config.dynamic_image_size = data_args.dynamic_image_size config.use_thumbnail = data_args.use_thumbnail - config.ps_version = model_args.ps_version + config.pixel_unshuffle_version = model_args.pixel_unshuffle_version config.min_dynamic_patch = data_args.min_dynamic_patch config.max_dynamic_patch = data_args.max_dynamic_patch model = InternVLChatModel.from_pretrained( @@ -932,7 +965,7 @@ def main(): vision_config.to_dict(), llm_config.to_dict(), downsample_ratio=data_args.down_sample_ratio, pad2square=data_args.pad2square, template=data_args.conv_style, select_layer=model_args.vision_select_layer, dynamic_image_size=data_args.dynamic_image_size, - use_thumbnail=data_args.use_thumbnail, ps_version=model_args.ps_version, + use_thumbnail=data_args.use_thumbnail, pixel_unshuffle_version=model_args.pixel_unshuffle_version, min_dynamic_patch=data_args.min_dynamic_patch, max_dynamic_patch=data_args.max_dynamic_patch) internvl_chat_config.force_image_size = data_args.force_image_size logger.info('Building InternVLChatModel...') diff --git a/internvl_chat/internvl/train/internvl_chat_mpo.py b/internvl_chat/internvl/train/internvl_chat_mpo.py index 802f94588..6841396aa 100644 --- a/internvl_chat/internvl/train/internvl_chat_mpo.py +++ b/internvl_chat/internvl/train/internvl_chat_mpo.py @@ -146,10 +146,15 @@ class ModelArguments: default=0.0, metadata={'help': 'Set the drop path rate for the ViT. Default is 0.'}, ) - ps_version: Literal['v1', 'v2'] = field( + pixel_unshuffle_version: Literal['v1', 'v2'] = field( default='v2', metadata={'help': 'Specify the version of pixel shuffle implementation. Default is v2.'} ) + # Deprecated alias for pixel_unshuffle_version; keep temporarily for backward compatibility + ps_version: Optional[Literal['v1', 'v2']] = field( + default=None, + metadata={'help': 'DEPRECATED: use --pixel_unshuffle_version instead.'} + ) use_fast_tokenizer: bool = field( default=False, metadata={'help': 'Set to True to use the fast mode of the tokenizer.'} @@ -803,6 +808,35 @@ def main(): handlers=[logging.StreamHandler(sys.stdout)], ) + # -------------------- begin: migrate deprecated args (ps_version -> pixel_unshuffle_version) -------------------- + + # !! IF `ps_version` IS READY TO BE DEPRECATED, REMOVE THE FOLLOWING BLOCK COMPLETELY !! + + # Getting the "default value" of the new argument to determine if the user has explicitly set it + _default_new = ModelArguments.__dataclass_fields__['pixel_unshuffle_version'].default + new_is_default = (model_args.pixel_unshuffle_version == _default_new) + + # If the old argument is provided, issue a deprecation warning and map it + if getattr(model_args, 'ps_version', None) is not None: + warnings.warn( + '`--ps_version` is deprecated and will be removed; use `--pixel_unshuffle_version` instead.', + DeprecationWarning + ) + # If the new argument appears to have been explicitly set, and conflicts with the old value → take the new argument + if not new_is_default and model_args.ps_version != model_args.pixel_unshuffle_version: + logger.warning( + f'Both ps_version={model_args.ps_version} (deprecated) and ' + f'pixel_unshuffle_version={model_args.pixel_unshuffle_version} are provided; ' + f'using pixel_unshuffle_version.' + ) + else: + # Else, map the old value to the new argument + model_args.pixel_unshuffle_version = model_args.ps_version + + logger.info(f'Pixel unshuffle version = {model_args.pixel_unshuffle_version}') + # -------------------- end: migrate deprecated args ------------------------------------------------------------ + + if training_args.should_log: # The default of training_args.log_level is passive, so we set log level at info here to have that default. transformers.utils.logging.set_verbosity_info() @@ -873,7 +907,7 @@ def main(): config.select_layer = model_args.vision_select_layer config.dynamic_image_size = data_args.dynamic_image_size config.use_thumbnail = data_args.use_thumbnail - config.ps_version = model_args.ps_version + config.pixel_unshuffle_version = model_args.pixel_unshuffle_version config.min_dynamic_patch = data_args.min_dynamic_patch config.max_dynamic_patch = data_args.max_dynamic_patch model = InternVLChatModel.from_pretrained( @@ -904,7 +938,7 @@ def main(): vision_config.to_dict(), llm_config.to_dict(), downsample_ratio=data_args.down_sample_ratio, pad2square=data_args.pad2square, template=data_args.conv_style, select_layer=model_args.vision_select_layer, dynamic_image_size=data_args.dynamic_image_size, - use_thumbnail=data_args.use_thumbnail, ps_version=model_args.ps_version, + use_thumbnail=data_args.use_thumbnail, pixel_unshuffle_version=model_args.pixel_unshuffle_version, min_dynamic_patch=data_args.min_dynamic_patch, max_dynamic_patch=data_args.max_dynamic_patch) internvl_chat_config.force_image_size = data_args.force_image_size logger.info('Building InternVLChatModel...') diff --git a/internvl_chat/internvl/train/internvl_chat_pretrain.py b/internvl_chat/internvl/train/internvl_chat_pretrain.py index d7962ae92..e8c83bd90 100644 --- a/internvl_chat/internvl/train/internvl_chat_pretrain.py +++ b/internvl_chat/internvl/train/internvl_chat_pretrain.py @@ -145,10 +145,15 @@ class ModelArguments: default=0.0, metadata={'help': 'Set the drop path rate for the ViT. Default is 0.'}, ) - ps_version: Literal['v1', 'v2'] = field( + pixel_unshuffle_version: Literal['v1', 'v2'] = field( default='v2', metadata={'help': 'Specify the version of pixel shuffle implementation. Default is v2.'} ) + # Deprecated alias for pixel_unshuffle_version; keep temporarily for backward compatibility + ps_version: Optional[Literal['v1', 'v2']] = field( + default=None, + metadata={'help': 'DEPRECATED: use --pixel_unshuffle_version instead.'} + ) use_fast_tokenizer: bool = field( default=False, metadata={'help': 'Set to True to use the fast mode of the tokenizer.'} @@ -870,6 +875,36 @@ def main(): datefmt='%m/%d/%Y %H:%M:%S', handlers=[logging.StreamHandler(sys.stdout)], ) + + + # -------------------- begin: migrate deprecated args (ps_version -> pixel_unshuffle_version) -------------------- + + # !! IF `ps_version` IS READY TO BE DEPRECATED, REMOVE THE FOLLOWING BLOCK COMPLETELY !! + + # Getting the "default value" of the new argument to determine if the user has explicitly set it + _default_new = ModelArguments.__dataclass_fields__['pixel_unshuffle_version'].default + new_is_default = (model_args.pixel_unshuffle_version == _default_new) + + # If the old argument is provided, issue a deprecation warning and map it + if getattr(model_args, 'ps_version', None) is not None: + warnings.warn( + '`--ps_version` is deprecated and will be removed; use `--pixel_unshuffle_version` instead.', + DeprecationWarning + ) + # If the new argument appears to have been explicitly set, and conflicts with the old value → take the new argument + if not new_is_default and model_args.ps_version != model_args.pixel_unshuffle_version: + logger.warning( + f'Both ps_version={model_args.ps_version} (deprecated) and ' + f'pixel_unshuffle_version={model_args.pixel_unshuffle_version} are provided; ' + f'using pixel_unshuffle_version.' + ) + else: + # Else, map the old value to the new argument + model_args.pixel_unshuffle_version = model_args.ps_version + + logger.info(f'Pixel unshuffle version = {model_args.pixel_unshuffle_version}') + # -------------------- end: migrate deprecated args ------------------------------------------------------------ + if training_args.should_log: # The default of training_args.log_level is passive, so we set log level at info here to have that default. @@ -947,7 +982,7 @@ def main(): config.select_layer = model_args.vision_select_layer config.dynamic_image_size = data_args.dynamic_image_size config.use_thumbnail = data_args.use_thumbnail - config.ps_version = model_args.ps_version + config.pixel_unshuffle_version = model_args.pixel_unshuffle_version config.min_dynamic_patch = data_args.min_dynamic_patch config.max_dynamic_patch = data_args.max_dynamic_patch model = InternVLChatModel.from_pretrained( @@ -976,7 +1011,7 @@ def main(): vision_config.to_dict(), llm_config.to_dict(), downsample_ratio=data_args.down_sample_ratio, pad2square=data_args.pad2square, template=data_args.conv_style, select_layer=model_args.vision_select_layer, dynamic_image_size=data_args.dynamic_image_size, - use_thumbnail=data_args.use_thumbnail, ps_version=model_args.ps_version, + use_thumbnail=data_args.use_thumbnail, pixel_unshuffle_version=model_args.pixel_unshuffle_version, min_dynamic_patch=data_args.min_dynamic_patch, max_dynamic_patch=data_args.max_dynamic_patch) internvl_chat_config.force_image_size = data_args.force_image_size logger.info('Building InternVLChatModel...') diff --git a/internvl_chat/shell/internvl1.2/2nd_finetune/internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl1.2/2nd_finetune/internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_full.sh index 20be1beec..44289d6d1 100644 --- a/internvl_chat/shell/internvl1.2/2nd_finetune/internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl1.2/2nd_finetune/internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_full.sh @@ -70,7 +70,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size False \ --use_thumbnail False \ - --ps_version 'v1' \ + --pixel_unshuffle_version 'v1' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.2/2nd_finetune/internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl1.2/2nd_finetune/internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_lora.sh index b7195d645..d04d37dfd 100644 --- a/internvl_chat/shell/internvl1.2/2nd_finetune/internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl1.2/2nd_finetune/internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size False \ --use_thumbnail False \ - --ps_version 'v1' \ + --pixel_unshuffle_version 'v1' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh index 61ea3c646..e23460e46 100644 --- a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh @@ -62,7 +62,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh index e929bd265..85bf4c950 100644 --- a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_full.sh index 02439183e..507b05011 100644 --- a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_full.sh @@ -62,7 +62,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_lora.sh index 52a479862..307efe015 100644 --- a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_full.sh index 7eb1517e3..4e987daf5 100644 --- a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_full.sh @@ -62,7 +62,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh index 1e541bd3b..525151da5 100644 --- a/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl1.5/2nd_finetune/internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/hermes2_yi34b/internvl_chat_v1_5_hermes2_yi34b_dynamic_res_finetune.sh b/internvl_chat/shell/internvl1.5/hermes2_yi34b/internvl_chat_v1_5_hermes2_yi34b_dynamic_res_finetune.sh index 18026d86e..abb493306 100644 --- a/internvl_chat/shell/internvl1.5/hermes2_yi34b/internvl_chat_v1_5_hermes2_yi34b_dynamic_res_finetune.sh +++ b/internvl_chat/shell/internvl1.5/hermes2_yi34b/internvl_chat_v1_5_hermes2_yi34b_dynamic_res_finetune.sh @@ -70,7 +70,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/hermes2_yi34b/internvl_chat_v1_5_hermes2_yi34b_dynamic_res_pretrain.sh b/internvl_chat/shell/internvl1.5/hermes2_yi34b/internvl_chat_v1_5_hermes2_yi34b_dynamic_res_pretrain.sh index 3992907c5..6e0f1f15b 100644 --- a/internvl_chat/shell/internvl1.5/hermes2_yi34b/internvl_chat_v1_5_hermes2_yi34b_dynamic_res_pretrain.sh +++ b/internvl_chat/shell/internvl1.5/hermes2_yi34b/internvl_chat_v1_5_hermes2_yi34b_dynamic_res_pretrain.sh @@ -72,7 +72,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/internlm2_1_8b/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_finetune.sh b/internvl_chat/shell/internvl1.5/internlm2_1_8b/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_finetune.sh index c0f3677e2..a685ed492 100644 --- a/internvl_chat/shell/internvl1.5/internlm2_1_8b/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_finetune.sh +++ b/internvl_chat/shell/internvl1.5/internlm2_1_8b/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_finetune.sh @@ -70,7 +70,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/internlm2_1_8b/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_pretrain.sh b/internvl_chat/shell/internvl1.5/internlm2_1_8b/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_pretrain.sh index aa9d3ee9f..b12f918a8 100644 --- a/internvl_chat/shell/internvl1.5/internlm2_1_8b/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_pretrain.sh +++ b/internvl_chat/shell/internvl1.5/internlm2_1_8b/internvl_chat_v1_5_internlm2_1_8b_dynamic_res_pretrain.sh @@ -71,7 +71,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/internlm2_20b/internvl_chat_v1_5_internlm2_20b_dynamic_res_finetune.sh b/internvl_chat/shell/internvl1.5/internlm2_20b/internvl_chat_v1_5_internlm2_20b_dynamic_res_finetune.sh index 11804be90..a782e6bb3 100644 --- a/internvl_chat/shell/internvl1.5/internlm2_20b/internvl_chat_v1_5_internlm2_20b_dynamic_res_finetune.sh +++ b/internvl_chat/shell/internvl1.5/internlm2_20b/internvl_chat_v1_5_internlm2_20b_dynamic_res_finetune.sh @@ -70,7 +70,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/internlm2_20b/internvl_chat_v1_5_internlm2_20b_dynamic_res_pretrain.sh b/internvl_chat/shell/internvl1.5/internlm2_20b/internvl_chat_v1_5_internlm2_20b_dynamic_res_pretrain.sh index 8edc053c7..26a63fc08 100644 --- a/internvl_chat/shell/internvl1.5/internlm2_20b/internvl_chat_v1_5_internlm2_20b_dynamic_res_pretrain.sh +++ b/internvl_chat/shell/internvl1.5/internlm2_20b/internvl_chat_v1_5_internlm2_20b_dynamic_res_pretrain.sh @@ -71,7 +71,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/phi3_3_8b/internvl_chat_v1_5_phi3_3_8b_dynamic_res_finetune.sh b/internvl_chat/shell/internvl1.5/phi3_3_8b/internvl_chat_v1_5_phi3_3_8b_dynamic_res_finetune.sh index b4c521edd..986660f57 100644 --- a/internvl_chat/shell/internvl1.5/phi3_3_8b/internvl_chat_v1_5_phi3_3_8b_dynamic_res_finetune.sh +++ b/internvl_chat/shell/internvl1.5/phi3_3_8b/internvl_chat_v1_5_phi3_3_8b_dynamic_res_finetune.sh @@ -70,7 +70,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl1.5/phi3_3_8b/internvl_chat_v1_5_phi3_3_8b_dynamic_res_pretrain.sh b/internvl_chat/shell/internvl1.5/phi3_3_8b/internvl_chat_v1_5_phi3_3_8b_dynamic_res_pretrain.sh index f5b912686..ab88b8691 100644 --- a/internvl_chat/shell/internvl1.5/phi3_3_8b/internvl_chat_v1_5_phi3_3_8b_dynamic_res_pretrain.sh +++ b/internvl_chat/shell/internvl1.5/phi3_3_8b/internvl_chat_v1_5_phi3_3_8b_dynamic_res_pretrain.sh @@ -71,7 +71,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_full.sh index b67be7201..a8c29cb38 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_full.sh @@ -62,7 +62,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_lora.sh index 38994e2c2..4c6743a1b 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_full.sh index 308e8ee90..e885e460b 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_full.sh @@ -62,7 +62,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_lora.sh index c37302e7a..b7f8b3366 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh index a95539f52..9fba3c317 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh @@ -62,7 +62,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh index e253361d2..737ed1aab 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora_coco.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora_coco.sh index f68d68dab..505be3256 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora_coco.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora_coco.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_full.sh index 28d924827..7d2ae504e 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_full.sh @@ -70,7 +70,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_lora.sh index cf1aa9d9c..8b573f385 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_full.sh index 0fe9614cb..492b5e7b2 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_full.sh @@ -62,7 +62,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh index d4242b214..c34cdeed5 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_full.sh index 45ce5b61c..7332ece83 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_full.sh @@ -70,7 +70,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_lora.sh index 9cd0175bd..25f47e338 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_full.sh index 6fb447267..4c0d91dd1 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_full.sh @@ -62,7 +62,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_lora.sh index 9fdb7559a..63fa96646 100644 --- a/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_lora.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.0_mpo/preference_optimization/internvl2_8b_internlm2_7b_dynamic_res_mpo_full.sh b/internvl_chat/shell/internvl2.0_mpo/preference_optimization/internvl2_8b_internlm2_7b_dynamic_res_mpo_full.sh index c99289495..e16cabc00 100644 --- a/internvl_chat/shell/internvl2.0_mpo/preference_optimization/internvl2_8b_internlm2_7b_dynamic_res_mpo_full.sh +++ b/internvl_chat/shell/internvl2.0_mpo/preference_optimization/internvl2_8b_internlm2_7b_dynamic_res_mpo_full.sh @@ -71,7 +71,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_1b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_1b_dynamic_res_2nd_finetune_full.sh index 94af8e6fe..09e22c9fe 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_1b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_1b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_1b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_1b_dynamic_res_2nd_finetune_lora.sh index 71c1b1e9a..73cd080b6 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_1b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_1b_dynamic_res_2nd_finetune_lora.sh @@ -64,7 +64,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_26b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_26b_dynamic_res_2nd_finetune_full.sh index b3199ec8b..dae55e16f 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_26b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_26b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_26b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_26b_dynamic_res_2nd_finetune_lora.sh index b8b812c45..427c31b74 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_26b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_26b_dynamic_res_2nd_finetune_lora.sh @@ -64,7 +64,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_full.sh index fca861bdf..3c663b7d5 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_lora.sh index 0830a4d36..d3868fd08 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_lora.sh @@ -64,7 +64,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_lora_coco.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_lora_coco.sh index 43cd4db64..2316409fe 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_lora_coco.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_2b_dynamic_res_2nd_finetune_lora_coco.sh @@ -64,7 +64,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_38b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_38b_dynamic_res_2nd_finetune_full.sh index a31bc4460..6800348c9 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_38b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_38b_dynamic_res_2nd_finetune_full.sh @@ -71,7 +71,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_38b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_38b_dynamic_res_2nd_finetune_lora.sh index eedd27303..cca08eef1 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_38b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_38b_dynamic_res_2nd_finetune_lora.sh @@ -64,7 +64,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_4b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_4b_dynamic_res_2nd_finetune_full.sh index f51b46fd5..f0a3f151e 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_4b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_4b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_4b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_4b_dynamic_res_2nd_finetune_lora.sh index 59fef84e0..13d68c4c2 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_4b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_4b_dynamic_res_2nd_finetune_lora.sh @@ -64,7 +64,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_78b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_78b_dynamic_res_2nd_finetune_full.sh index f7ab24fcd..bc62f4dfc 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_78b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_78b_dynamic_res_2nd_finetune_full.sh @@ -71,7 +71,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_78b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_78b_dynamic_res_2nd_finetune_lora.sh index 4578248f0..87e4d2803 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_78b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_78b_dynamic_res_2nd_finetune_lora.sh @@ -64,7 +64,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_8b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_8b_dynamic_res_2nd_finetune_full.sh index ab3231b54..315d53c21 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_8b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_8b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_8b_dynamic_res_2nd_finetune_lora.sh b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_8b_dynamic_res_2nd_finetune_lora.sh index c740d53a7..bcd7bda94 100644 --- a/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_8b_dynamic_res_2nd_finetune_lora.sh +++ b/internvl_chat/shell/internvl2.5/2nd_finetune/internvl2_5_8b_dynamic_res_2nd_finetune_lora.sh @@ -64,7 +64,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" diff --git a/internvl_chat/shell/internvl2.5/stage1.5/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1_5.sh b/internvl_chat/shell/internvl2.5/stage1.5/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1_5.sh index 5afbe82c8..6ec49f374 100644 --- a/internvl_chat/shell/internvl2.5/stage1.5/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1_5.sh +++ b/internvl_chat/shell/internvl2.5/stage1.5/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1_5.sh @@ -74,7 +74,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage1.5/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1_5.sh b/internvl_chat/shell/internvl2.5/stage1.5/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1_5.sh index 356cfc8ed..da6a79cb5 100644 --- a/internvl_chat/shell/internvl2.5/stage1.5/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1_5.sh +++ b/internvl_chat/shell/internvl2.5/stage1.5/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1_5.sh @@ -74,7 +74,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage1.sh b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage1.sh index 4c9e1cc03..f5e4985a7 100644 --- a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage1.sh +++ b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage1.sh @@ -75,7 +75,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1.sh b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1.sh index 7c79edc0f..216e09979 100644 --- a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1.sh +++ b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1.sh @@ -75,7 +75,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage1.sh b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage1.sh index b88d2c16b..10b64edb0 100644 --- a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage1.sh +++ b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage1.sh @@ -75,7 +75,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_38b_qwen2_5_32b_dynamic_res_stage1.sh b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_38b_qwen2_5_32b_dynamic_res_stage1.sh index c08110195..53f0dd9c5 100644 --- a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_38b_qwen2_5_32b_dynamic_res_stage1.sh +++ b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_38b_qwen2_5_32b_dynamic_res_stage1.sh @@ -75,7 +75,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_4b_qwen2_5_3b_dynamic_res_stage1.sh b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_4b_qwen2_5_3b_dynamic_res_stage1.sh index fae248e8a..cb0ed1cd6 100644 --- a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_4b_qwen2_5_3b_dynamic_res_stage1.sh +++ b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_4b_qwen2_5_3b_dynamic_res_stage1.sh @@ -75,7 +75,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_78b_qwen2_5_72b_dynamic_res_stage1.sh b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_78b_qwen2_5_72b_dynamic_res_stage1.sh index 08924d0df..d76d0ad51 100644 --- a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_78b_qwen2_5_72b_dynamic_res_stage1.sh +++ b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_78b_qwen2_5_72b_dynamic_res_stage1.sh @@ -75,7 +75,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_70b.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1.sh b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1.sh index 225f9dc98..0c589680f 100644 --- a/internvl_chat/shell/internvl2.5/stage1/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1.sh +++ b/internvl_chat/shell/internvl2.5/stage1/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1.sh @@ -75,7 +75,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage2.sh b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage2.sh index d348c3910..43f03c183 100644 --- a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage2.sh +++ b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage2.sh @@ -74,7 +74,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage2.sh b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage2.sh index bbfb43230..7e15261dd 100644 --- a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage2.sh +++ b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_26b_internlm2_5_20b_dynamic_res_stage2.sh @@ -74,7 +74,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage2.sh b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage2.sh index baa66aaf9..e8e6d210c 100644 --- a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage2.sh +++ b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage2.sh @@ -74,7 +74,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_38b_qwen2_5_32b_dynamic_res_stage2.sh b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_38b_qwen2_5_32b_dynamic_res_stage2.sh index 81f2c75f7..3a7e98d2b 100644 --- a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_38b_qwen2_5_32b_dynamic_res_stage2.sh +++ b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_38b_qwen2_5_32b_dynamic_res_stage2.sh @@ -76,7 +76,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_4b_qwen2_5_3b_dynamic_res_stage2.sh b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_4b_qwen2_5_3b_dynamic_res_stage2.sh index decc1171f..fbaf4c1d9 100644 --- a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_4b_qwen2_5_3b_dynamic_res_stage2.sh +++ b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_4b_qwen2_5_3b_dynamic_res_stage2.sh @@ -74,7 +74,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_78b_qwen2_5_72b_dynamic_res_stage2.sh b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_78b_qwen2_5_72b_dynamic_res_stage2.sh index f3d1badce..19ce36f99 100644 --- a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_78b_qwen2_5_72b_dynamic_res_stage2.sh +++ b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_78b_qwen2_5_72b_dynamic_res_stage2.sh @@ -76,7 +76,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b_1e8.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage2.sh b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage2.sh index 18544c8da..a6c7863e7 100644 --- a/internvl_chat/shell/internvl2.5/stage2/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage2.sh +++ b/internvl_chat/shell/internvl2.5/stage2/internvl2_5_8b_internlm2_5_7b_dynamic_res_stage2.sh @@ -74,7 +74,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_mpo.sh b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_mpo.sh index 0786bf894..842b6cb2c 100644 --- a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_mpo.sh +++ b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_1b_qwen2_5_0_5b_dynamic_res_mpo.sh @@ -66,7 +66,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_26b_internlm2_5_20b_dynamic_res_mpo.sh b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_26b_internlm2_5_20b_dynamic_res_mpo.sh index 5a00db77e..835458e6b 100644 --- a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_26b_internlm2_5_20b_dynamic_res_mpo.sh +++ b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_26b_internlm2_5_20b_dynamic_res_mpo.sh @@ -66,7 +66,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_70b.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_mpo.sh b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_mpo.sh index a325ccb7e..39fec2b56 100644 --- a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_mpo.sh +++ b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_2b_internlm2_5_1_8b_dynamic_res_mpo.sh @@ -66,7 +66,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_38b_qwen2_5_32b_dynamic_res_mpo.sh b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_38b_qwen2_5_32b_dynamic_res_mpo.sh index 0afd7018d..87c9f20dd 100644 --- a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_38b_qwen2_5_32b_dynamic_res_mpo.sh +++ b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_38b_qwen2_5_32b_dynamic_res_mpo.sh @@ -66,7 +66,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_4b_qwen2_5_3b_dynamic_res_mpo.sh b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_4b_qwen2_5_3b_dynamic_res_mpo.sh index 1a1bf87a7..5ec012d5d 100644 --- a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_4b_qwen2_5_3b_dynamic_res_mpo.sh +++ b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_4b_qwen2_5_3b_dynamic_res_mpo.sh @@ -66,7 +66,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_78b_qwen2_5_72b_dynamic_res_mpo.sh b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_78b_qwen2_5_72b_dynamic_res_mpo.sh index 889c591f1..7c24068da 100644 --- a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_78b_qwen2_5_72b_dynamic_res_mpo.sh +++ b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_78b_qwen2_5_72b_dynamic_res_mpo.sh @@ -66,7 +66,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b_1e8.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_8b_internlm2_5_7b_dynamic_res_mpo.sh b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_8b_internlm2_5_7b_dynamic_res_mpo.sh index e687f5b97..29d986c68 100644 --- a/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_8b_internlm2_5_7b_dynamic_res_mpo.sh +++ b/internvl_chat/shell/internvl2.5_mpo/preference_optimization/internvl2_5_8b_internlm2_5_7b_dynamic_res_mpo.sh @@ -66,7 +66,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_14b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_14b_dynamic_res_2nd_finetune_full.sh index 65447e99a..714fc186f 100644 --- a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_14b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_14b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" \ No newline at end of file diff --git a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_1b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_1b_dynamic_res_2nd_finetune_full.sh index f5038f6f7..4539f9162 100644 --- a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_1b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_1b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" \ No newline at end of file diff --git a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_2b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_2b_dynamic_res_2nd_finetune_full.sh index 59cd06018..37975711a 100644 --- a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_2b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_2b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" \ No newline at end of file diff --git a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_38b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_38b_dynamic_res_2nd_finetune_full.sh index 1d6f7703d..596448b4b 100644 --- a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_38b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_38b_dynamic_res_2nd_finetune_full.sh @@ -71,7 +71,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_34b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" \ No newline at end of file diff --git a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_78b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_78b_dynamic_res_2nd_finetune_full.sh index 9523d10a1..24f348506 100644 --- a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_78b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_78b_dynamic_res_2nd_finetune_full.sh @@ -71,7 +71,7 @@ srun -p ${PARTITION} \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" \ No newline at end of file diff --git a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_8b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_8b_dynamic_res_2nd_finetune_full.sh index 6cffde913..dba8763df 100644 --- a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_8b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_8b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" \ No newline at end of file diff --git a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_9b_dynamic_res_2nd_finetune_full.sh b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_9b_dynamic_res_2nd_finetune_full.sh index 5a426db42..363000d0e 100644 --- a/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_9b_dynamic_res_2nd_finetune_full.sh +++ b/internvl_chat/shell/internvl3.0/2nd_finetune/internvl3_9b_dynamic_res_2nd_finetune_full.sh @@ -63,7 +63,7 @@ torchrun \ --group_by_length True \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ 2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt" \ No newline at end of file diff --git a/internvl_chat/shell/internvl3.0/mpo/internvl3_14b_mpo.sh b/internvl_chat/shell/internvl3.0/mpo/internvl3_14b_mpo.sh index 01bcf8b76..436a6aeac 100644 --- a/internvl_chat/shell/internvl3.0/mpo/internvl3_14b_mpo.sh +++ b/internvl_chat/shell/internvl3.0/mpo/internvl3_14b_mpo.sh @@ -68,7 +68,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b_1e8.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl3.0/mpo/internvl3_1b_mpo.sh b/internvl_chat/shell/internvl3.0/mpo/internvl3_1b_mpo.sh index 05bb48149..1a88e69bd 100644 --- a/internvl_chat/shell/internvl3.0/mpo/internvl3_1b_mpo.sh +++ b/internvl_chat/shell/internvl3.0/mpo/internvl3_1b_mpo.sh @@ -68,7 +68,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl3.0/mpo/internvl3_2b_mpo.sh b/internvl_chat/shell/internvl3.0/mpo/internvl3_2b_mpo.sh index 83d60dd38..9b2aab51d 100644 --- a/internvl_chat/shell/internvl3.0/mpo/internvl3_2b_mpo.sh +++ b/internvl_chat/shell/internvl3.0/mpo/internvl3_2b_mpo.sh @@ -68,7 +68,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl3.0/mpo/internvl3_38b_mpo.sh b/internvl_chat/shell/internvl3.0/mpo/internvl3_38b_mpo.sh index f45294209..974381080 100644 --- a/internvl_chat/shell/internvl3.0/mpo/internvl3_38b_mpo.sh +++ b/internvl_chat/shell/internvl3.0/mpo/internvl3_38b_mpo.sh @@ -69,7 +69,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b_1e7_offload.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl3.0/mpo/internvl3_78b_mpo.sh b/internvl_chat/shell/internvl3.0/mpo/internvl3_78b_mpo.sh index 3caf196fe..015c27b1a 100644 --- a/internvl_chat/shell/internvl3.0/mpo/internvl3_78b_mpo.sh +++ b/internvl_chat/shell/internvl3.0/mpo/internvl3_78b_mpo.sh @@ -69,7 +69,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b_1e7_offload.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl3.0/mpo/internvl3_8b_mpo.sh b/internvl_chat/shell/internvl3.0/mpo/internvl3_8b_mpo.sh index fddfccfd2..4b8d120f0 100644 --- a/internvl_chat/shell/internvl3.0/mpo/internvl3_8b_mpo.sh +++ b/internvl_chat/shell/internvl3.0/mpo/internvl3_8b_mpo.sh @@ -68,7 +68,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b_1e8.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/internvl3.0/mpo/internvl3_9b_mpo.sh b/internvl_chat/shell/internvl3.0/mpo/internvl3_9b_mpo.sh index 56ac83374..0c26c4d28 100644 --- a/internvl_chat/shell/internvl3.0/mpo/internvl3_9b_mpo.sh +++ b/internvl_chat/shell/internvl3.0/mpo/internvl3_9b_mpo.sh @@ -68,7 +68,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage3_config_100b_1e8.json" \ --report_to "tensorboard" \ --loss_type sigmoid,bco_pair \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_bdd.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_bdd.sh index 721642b6a..bdc0f58fb 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_bdd.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_bdd.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_drivelm.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_drivelm.sh index 46f2500c0..6cc0f09ff 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_drivelm.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_drivelm.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_medical.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_medical.sh index 02b26fdf1..c604219a8 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_medical.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_medical.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_remote.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_remote.sh index 37a791959..028a52c58 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_remote.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_1b_qwen2_0_5b_dynamic_res_finetune_remote.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_bdd.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_bdd.sh index 8fbe75155..79730ce4e 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_bdd.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_bdd.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_drivelm.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_drivelm.sh index 9fde37115..807dff6e2 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_drivelm.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_drivelm.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_medical.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_medical.sh index 196138714..758c562cc 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_medical.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_medical.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_remote.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_remote.sh index 341979f68..8595b190f 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_remote.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_2b_internlm2_1_8b_dynamic_res_finetune_remote.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_bdd.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_bdd.sh index 38df53414..3078c09bf 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_bdd.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_bdd.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_drivelm.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_drivelm.sh index d7528e493..7a826ca59 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_drivelm.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_drivelm.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_medical.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_medical.sh index 1963db455..569d236bb 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_medical.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_medical.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_remote.sh b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_remote.sh index e9fe3bb04..bd6243ffb 100644 --- a/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_remote.sh +++ b/internvl_chat/shell/mini_internvl/domain_adaptation/internvl2_4b_phi3_3_8b_dynamic_res_finetune_remote.sh @@ -65,7 +65,7 @@ srun -p ${PARTITION} \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --deepspeed "zero_stage1_config.json" \ --report_to "tensorboard" \ --use_packed_ds True \ diff --git a/internvl_chat_gpt_oss/internvl/model/internvl_chat/configuration_internvl_chat.py b/internvl_chat_gpt_oss/internvl/model/internvl_chat/configuration_internvl_chat.py index 4ff9ddebd..2dca7e9c5 100644 --- a/internvl_chat_gpt_oss/internvl/model/internvl_chat/configuration_internvl_chat.py +++ b/internvl_chat_gpt_oss/internvl/model/internvl_chat/configuration_internvl_chat.py @@ -31,7 +31,7 @@ def __init__( template=None, dynamic_image_size=False, use_thumbnail=False, - ps_version="v1", + pixel_unshuffle_version="v1", min_dynamic_patch=1, max_dynamic_patch=6, **kwargs, @@ -82,13 +82,13 @@ def __init__( self.template = template self.dynamic_image_size = dynamic_image_size self.use_thumbnail = use_thumbnail - self.ps_version = ps_version # pixel shuffle version + self.pixel_unshuffle_version = pixel_unshuffle_version # pixel unshuffle version self.min_dynamic_patch = min_dynamic_patch self.max_dynamic_patch = max_dynamic_patch self.tie_word_embeddings = self.llm_config.tie_word_embeddings logger.info(f'vision_select_layer: {self.select_layer}') - logger.info(f'ps_version: {self.ps_version}') + logger.info(f'pixel_unshuffle_version: {self.pixel_unshuffle_version}') logger.info(f'min_dynamic_patch: {self.min_dynamic_patch}') logger.info(f'max_dynamic_patch: {self.max_dynamic_patch}') @@ -111,7 +111,7 @@ def to_dict(self): output['template'] = self.template output['dynamic_image_size'] = self.dynamic_image_size output['use_thumbnail'] = self.use_thumbnail - output['ps_version'] = self.ps_version + output['pixel_unshuffle_version'] = self.pixel_unshuffle_version output['min_dynamic_patch'] = self.min_dynamic_patch output['max_dynamic_patch'] = self.max_dynamic_patch diff --git a/internvl_chat_gpt_oss/internvl/model/internvl_chat/modeling_internvl_chat.py b/internvl_chat_gpt_oss/internvl/model/internvl_chat/modeling_internvl_chat.py index d2a78f998..156979171 100644 --- a/internvl_chat_gpt_oss/internvl/model/internvl_chat/modeling_internvl_chat.py +++ b/internvl_chat_gpt_oss/internvl/model/internvl_chat/modeling_internvl_chat.py @@ -64,13 +64,13 @@ def __init__(self, config: InternVLChatConfig, vision_model=None, language_model self.template = config.template self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2)) self.downsample_ratio = config.downsample_ratio - self.ps_version = config.ps_version + self.pixel_unshuffle_version = config.pixel_unshuffle_version use_flash_attn = use_flash_attn if has_flash_attn else False config.vision_config.use_flash_attn = True if use_flash_attn else False # config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager' logger.info(f'num_image_token: {self.num_image_token}') - logger.info(f'ps_version: {self.ps_version}') + logger.info(f'pixel_unshuffle_version: {self.pixel_unshuffle_version}') if vision_model is not None: self.vision_model = vision_model else: @@ -275,7 +275,7 @@ def forward( attentions=outputs.attentions, ) - def pixel_shuffle(self, x, scale_factor=0.5): + def pixel_unshuffle(self, x, scale_factor=0.5): n, w, h, c = x.size() # N, W, H, C --> N, W, H * scale, C // scale x = x.view(n, w, int(h * scale_factor), int(c / scale_factor)) @@ -284,8 +284,8 @@ def pixel_shuffle(self, x, scale_factor=0.5): # N, H * scale, W, C // scale --> N, H * scale, W * scale, C // (scale ** 2) x = x.view(n, int(h * scale_factor), int(w * scale_factor), int(c / (scale_factor * scale_factor))) - if self.ps_version == 'v1': - warnings.warn("In ps_version 'v1', the height and width have not been swapped back, " + if self.pixel_unshuffle_version == 'v1': + warnings.warn("In pixel_unshuffle_version 'v1', the height and width have not been swapped back, " 'which results in a transposed image.') else: x = x.permute(0, 2, 1, 3).contiguous() @@ -306,7 +306,7 @@ def extract_feature(self, pixel_values): h = w = int(vit_embeds.shape[1] ** 0.5) vit_embeds = vit_embeds.reshape(vit_embeds.shape[0], h, w, -1) - vit_embeds = self.pixel_shuffle(vit_embeds, scale_factor=self.downsample_ratio) + vit_embeds = self.pixel_unshuffle(vit_embeds, scale_factor=self.downsample_ratio) vit_embeds = vit_embeds.reshape(vit_embeds.shape[0], -1, vit_embeds.shape[-1]) vit_embeds = self.mlp1(vit_embeds) return vit_embeds diff --git a/internvl_chat_gpt_oss/internvl/train/internvl_chat_finetune.py b/internvl_chat_gpt_oss/internvl/train/internvl_chat_finetune.py index 01145fdc6..9dc208294 100644 --- a/internvl_chat_gpt_oss/internvl/train/internvl_chat_finetune.py +++ b/internvl_chat_gpt_oss/internvl/train/internvl_chat_finetune.py @@ -133,10 +133,15 @@ class ModelArguments: default=0.0, metadata={'help': 'Set the drop path rate for the ViT. Default is 0.'}, ) - ps_version: Literal['v1', 'v2'] = field( + pixel_unshuffle_version: Literal['v1', 'v2'] = field( default='v2', metadata={'help': 'Specify the version of pixel shuffle implementation. Default is v2.'} ) + # Deprecated alias for pixel_unshuffle_version; keep temporarily for backward compatibility + ps_version: Optional[Literal['v1', 'v2']] = field( + default=None, + metadata={'help': 'DEPRECATED: use --pixel_unshuffle_version instead.'} + ) use_fast_tokenizer: bool = field( default=False, metadata={'help': 'Set to True to use the fast mode of the tokenizer.'} @@ -887,6 +892,36 @@ def main(): handlers=[logging.StreamHandler(sys.stdout)], ) + + # -------------------- begin: migrate deprecated args (ps_version -> pixel_unshuffle_version) -------------------- + + # !! IF `ps_version` IS READY TO BE DEPRECATED, REMOVE THE FOLLOWING BLOCK COMPLETELY !! + + # Getting the "default value" of the new argument to determine if the user has explicitly set it + _default_new = ModelArguments.__dataclass_fields__['pixel_unshuffle_version'].default + new_is_default = (model_args.pixel_unshuffle_version == _default_new) + + # If the old argument is provided, issue a deprecation warning and map it + if getattr(model_args, 'ps_version', None) is not None: + warnings.warn( + '`--ps_version` is deprecated and will be removed; use `--pixel_unshuffle_version` instead.', + DeprecationWarning + ) + # If the new argument appears to have been explicitly set, and conflicts with the old value → take the new argument + if not new_is_default and model_args.ps_version != model_args.pixel_unshuffle_version: + logger.warning( + f'Both ps_version={model_args.ps_version} (deprecated) and ' + f'pixel_unshuffle_version={model_args.pixel_unshuffle_version} are provided; ' + f'using pixel_unshuffle_version.' + ) + else: + # Else, map the old value to the new argument + model_args.pixel_unshuffle_version = model_args.ps_version + + logger.info(f'Pixel unshuffle version = {model_args.pixel_unshuffle_version}') + # -------------------- end: migrate deprecated args ------------------------------------------------------------ + + if training_args.should_log: # The default of training_args.log_level is passive, so we set log level at info here to have that default. transformers.utils.logging.set_verbosity_info() @@ -961,7 +996,7 @@ def main(): config.select_layer = model_args.vision_select_layer config.dynamic_image_size = data_args.dynamic_image_size config.use_thumbnail = data_args.use_thumbnail - config.ps_version = model_args.ps_version + config.pixel_unshuffle_version = model_args.pixel_unshuffle_version config.min_dynamic_patch = data_args.min_dynamic_patch config.max_dynamic_patch = data_args.max_dynamic_patch model = InternVLChatModel.from_pretrained(model_args.model_name_or_path, torch_dtype=torch.bfloat16, config=config) @@ -996,7 +1031,7 @@ def main(): select_layer=model_args.vision_select_layer, dynamic_image_size=data_args.dynamic_image_size, use_thumbnail=data_args.use_thumbnail, - ps_version=model_args.ps_version, + pixel_unshuffle_version=model_args.pixel_unshuffle_version, min_dynamic_patch=data_args.min_dynamic_patch, max_dynamic_patch=data_args.max_dynamic_patch, ) diff --git a/internvl_chat_gpt_oss/internvl/train/internvl_chat_mpo.py b/internvl_chat_gpt_oss/internvl/train/internvl_chat_mpo.py index 7ac62a7ce..4f910a765 100644 --- a/internvl_chat_gpt_oss/internvl/train/internvl_chat_mpo.py +++ b/internvl_chat_gpt_oss/internvl/train/internvl_chat_mpo.py @@ -140,10 +140,15 @@ class ModelArguments: default=0.0, metadata={'help': 'Set the drop path rate for the ViT. Default is 0.'}, ) - ps_version: Literal['v1', 'v2'] = field( + pixel_unshuffle_version: Literal['v1', 'v2'] = field( default='v2', metadata={'help': 'Specify the version of pixel shuffle implementation. Default is v2.'} ) + # Deprecated alias for pixel_unshuffle_version; keep temporarily for backward compatibility + ps_version: Optional[Literal['v1', 'v2']] = field( + default=None, + metadata={'help': 'DEPRECATED: use --pixel_unshuffle_version instead.'} + ) use_fast_tokenizer: bool = field( default=False, metadata={'help': 'Set to True to use the fast mode of the tokenizer.'} @@ -920,6 +925,36 @@ def main(): handlers=[logging.StreamHandler(sys.stdout)], ) + + # -------------------- begin: migrate deprecated args (ps_version -> pixel_unshuffle_version) -------------------- + + # !! IF `ps_version` IS READY TO BE DEPRECATED, REMOVE THE FOLLOWING BLOCK COMPLETELY !! + + # Getting the "default value" of the new argument to determine if the user has explicitly set it + _default_new = ModelArguments.__dataclass_fields__['pixel_unshuffle_version'].default + new_is_default = (model_args.pixel_unshuffle_version == _default_new) + + # If the old argument is provided, issue a deprecation warning and map it + if getattr(model_args, 'ps_version', None) is not None: + warnings.warn( + '`--ps_version` is deprecated and will be removed; use `--pixel_unshuffle_version` instead.', + DeprecationWarning + ) + # If the new argument appears to have been explicitly set, and conflicts with the old value → take the new argument + if not new_is_default and model_args.ps_version != model_args.pixel_unshuffle_version: + logger.warning( + f'Both ps_version={model_args.ps_version} (deprecated) and ' + f'pixel_unshuffle_version={model_args.pixel_unshuffle_version} are provided; ' + f'using pixel_unshuffle_version.' + ) + else: + # Else, map the old value to the new argument + model_args.pixel_unshuffle_version = model_args.ps_version + + logger.info(f'Pixel unshuffle version = {model_args.pixel_unshuffle_version}') + # -------------------- end: migrate deprecated args ------------------------------------------------------------ + + if training_args.should_log: # The default of training_args.log_level is passive, so we set log level at info here to have that default. transformers.utils.logging.set_verbosity_info() @@ -997,7 +1032,7 @@ def main(): config.select_layer = model_args.vision_select_layer config.dynamic_image_size = data_args.dynamic_image_size config.use_thumbnail = data_args.use_thumbnail - config.ps_version = model_args.ps_version + config.pixel_unshuffle_version = model_args.pixel_unshuffle_version config.min_dynamic_patch = data_args.min_dynamic_patch config.max_dynamic_patch = data_args.max_dynamic_patch model = InternVLChatModel.from_pretrained(model_args.model_name_or_path, torch_dtype=torch.bfloat16, config=config) diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage0_mlp_warmup.sh b/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage0_mlp_warmup.sh index 55305e95b..c486631a9 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage0_mlp_warmup.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage0_mlp_warmup.sh @@ -80,7 +80,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn True \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage1_pretrain.sh b/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage1_pretrain.sh index 2591365b3..468c38576 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage1_pretrain.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage1_pretrain.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn True \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage2_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage2_sft.sh index 7841d2cb8..8183558b2 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage2_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage2_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn True \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage3_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage3_mpo.sh index 09518a57a..ae3f58414 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage3_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_gpt_oss/internvl3_5_gpt_oss_20b_stage3_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn True \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_14b_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_14b_mpo.sh index 1986f0333..06b41d918 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_14b_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_14b_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_14b_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_14b_sft.sh index b8c774918..d7a9aa8b0 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_14b_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_14b_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_1b_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_1b_mpo.sh index 76e7c0667..2308bc2d1 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_1b_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_1b_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_1b_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_1b_sft.sh index e73e68fb5..68ded905b 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_1b_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_1b_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_241b_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_241b_mpo.sh index a991e81d7..9dffae9b7 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_241b_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_241b_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_241b_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_241b_sft.sh index c5025e530..92171fd73 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_241b_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_241b_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_2b_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_2b_mpo.sh index 58aa84731..4ae428449 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_2b_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_2b_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_2b_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_2b_sft.sh index 9a3fb8537..7584753f7 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_2b_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_2b_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_30b_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_30b_mpo.sh index 49048a8eb..ae6afb58c 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_30b_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_30b_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_30b_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_30b_sft.sh index 0248a7aba..4815d1334 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_30b_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_30b_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_38b_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_38b_mpo.sh index c4efcae6d..86c67558f 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_38b_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_38b_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_38b_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_38b_sft.sh index 80192747e..44ffa8fcb 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_38b_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_38b_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_4b_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_4b_mpo.sh index d49ba4739..6994a4e0d 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_4b_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_4b_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_4b_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_4b_sft.sh index 1264e34b0..538bfe0ed 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_4b_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_4b_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_8b_mpo.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_8b_mpo.sh index 478c81cdb..f86b64a4b 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_8b_mpo.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_8b_mpo.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_mpo.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \ diff --git a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_8b_sft.sh b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_8b_sft.sh index f54674ae9..4aa98c8d6 100644 --- a/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_8b_sft.sh +++ b/internvl_chat_gpt_oss/shell/internvl3_5_qwen3/internvl3_5_8b_sft.sh @@ -78,7 +78,7 @@ internvl/train/internvl_chat_finetune.py \ --group_by_length False \ --dynamic_image_size True \ --use_thumbnail True \ - --ps_version 'v2' \ + --pixel_unshuffle_version 'v2' \ --use_custom_flash_attn False \ --report_to "tensorboard" \ --deepspeed "zero_stage3_config.json" \