Skip to content

When I load InternVL3-1B in the framework of Verl, I met "use trust_remote_code=True" and "pass trust=True" #1244

@yyy195

Description

@yyy195

Dear authors,
When I didn't use"Trust_remote_code=True", it reminded me to use it. But when i used , it has following bugs:

Error executing job with overrides: ['algorithm.adv_estimator=grpo', 'data.trust_remote_code=True', 'data.train_files=/data1/yyy25/datasets/geo3k/train.parquet', 'data.val_files=/data1/yyy25/datasets/geo3k/test.parquet', 'data.train_batch_size=128', 'data.max_prompt_length=1024', 'data.max_response_length=1536', 'data.filter_overlong_prompts=True', 'data.truncation=error', 'data.image_key=images', 'actor_rollout_ref.model.path=/data1/yyy25/datasets/InternVL3-1B', 'actor_rollout_ref.actor.optim.lr=1e-6', 'actor_rollout_ref.model.use_remove_padding=True', 'actor_rollout_ref.actor.ppo_mini_batch_size=64', 'actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8', 'actor_rollout_ref.actor.use_kl_loss=True', 'actor_rollout_ref.actor.kl_loss_coef=0.01', 'actor_rollout_ref.actor.kl_loss_type=low_var_kl', 'actor_rollout_ref.actor.entropy_coeff=0', 'actor_rollout_ref.model.enable_gradient_checkpointing=True', 'actor_rollout_ref.actor.fsdp_config.param_offload=False', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=False', 'actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16', 'actor_rollout_ref.rollout.tensor_model_parallel_size=2', 'actor_rollout_ref.rollout.name=vllm', 'actor_rollout_ref.rollout.engine_kwargs.vllm.disable_mm_preprocessor_cache=True', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.6', 'actor_rollout_ref.rollout.enable_chunked_prefill=False', 'actor_rollout_ref.rollout.enforce_eager=False', 'actor_rollout_ref.rollout.free_cache_engine=True', 'actor_rollout_ref.rollout.n=4', 'actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=20', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'algorithm.use_kl_in_reward=False', 'trainer.critic_warmup=0', 'trainer.logger=[console,wandb]', 'trainer.project_name=verl_grpo_example_geo3k', 'trainer.experiment_name=qwen2_5_vl_7b_function_rm', 'trainer.n_gpus_per_node=2', 'trainer.nnodes=1', 'trainer.save_freq=20', 'trainer.test_freq=5', 'trainer.total_epochs=15']
Traceback (most recent call last):
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/data1/yyy25/verl-internvl/verl/trainer/main_ppo.py", line 289, in
main()
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/main.py", line 94, in decorated_main
_run_hydra(
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
_run_app(
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/_internal/utils.py", line 457, in _run_app
run_and_report(
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
raise ex
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
return func()
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/_internal/utils.py", line 458, in
lambda: hydra.run(
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/_internal/hydra.py", line 132, in run
_ = ret.return_value
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/core/utils.py", line 260, in return_value
raise self._return_value
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/hydra/core/utils.py", line 186, in run_job
ret.return_value = task_function(task_cfg)
File "/data1/yyy25/verl-internvl/verl/trainer/main_ppo.py", line 31, in main
run_ppo(config)
File "/data1/yyy25/verl-internvl/verl/trainer/main_ppo.py", line 64, in run_ppo
ray.get(runner.run.remote(config))
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
return fn(*args, **kwargs)
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/ray/_private/worker.py", line 2822, in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/ray/_private/worker.py", line 930, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): ray::TaskRunner.run() (pid=3845197, ip=10.103.13.91, actor_id=214531f2ad269345a3be17bf01000000, repr=<main_ppo.TaskRunner object at 0x7fbc3e6676a0>)
File "/data1/yyy25/verl-internvl/verl/trainer/main_ppo.py", line 212, in run
trainer.init_workers()
File "/data1/yyy25/verl-internvl/verl/trainer/ppo/ray_trainer.py", line 883, in init_workers
self.ref_policy_wg.init_model()
File "/data1/yyy25/verl-internvl/verl/single_controller/ray/base.py", line 51, in call
output = ray.get(output)
ray.exceptions.RayTaskError(ValueError): ray::WorkerDict.ref_init_model() (pid=3852278, ip=10.103.13.91, actor_id=33d0ff4be57f360c22d46b9f01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fb61a4245b0>)
File "/data1/yyy25/verl-internvl/verl/single_controller/ray/base.py", line 710, in func
return getattr(self.worker_dict[key], name)(*args, **kwargs)
File "/data1/yyy25/verl-internvl/verl/single_controller/base/decorator.py", line 549, in inner
return func(*args, **kwargs)
File "/data1/yyy25/verl-internvl/verl/workers/fsdp_workers.py", line 654, in init_model
self.ref_module_fsdp = self._build_model_optimizer(
File "/data1/yyy25/verl-internvl/verl/workers/fsdp_workers.py", line 237, in _build_model_optimizer
self.tokenizer = hf_tokenizer(local_path, trust_remote_code=trust_remote_code)
File "/data1/yyy25/verl-internvl/verl/utils/tokenizer.py", line 64, in hf_tokenizer
config = AutoConfig.from_pretrained(name_or_path, trust_remote_code=trust_remote_code)
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py", line 1156, in from_pretrained
trust_remote_code = resolve_trust_remote_code(
File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/transformers/dynamic_module_utils.py", line 731, in resolve_trust_remote_code
raise ValueError(
ValueError: The repository /data1/yyy25/datasets/InternVL3-1B contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co//data1/yyy25/datasets/InternVL3-1B.
Please pass the argument trust_remote_code=True to allow custom code to be run.
(TaskRunner pid=3845197) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=3851024, ip=10.103.13.91, actor_id=e26ca310b1e1ea5900c441f501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7f736b4b46d0>)
(TaskRunner pid=3845197) File "/data1/yyy25/verl-internvl/verl/single_controller/ray/base.py", line 710, in func
(TaskRunner pid=3845197) return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=3845197) File "/data1/yyy25/verl-internvl/verl/single_controller/base/decorator.py", line 549, in inner
(TaskRunner pid=3845197) return func(*args, **kwargs)
(TaskRunner pid=3845197) File "/data1/yyy25/verl-internvl/verl/workers/fsdp_workers.py", line 654, in init_model
(TaskRunner pid=3845197) self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=3845197) File "/data1/yyy25/verl-internvl/verl/workers/fsdp_workers.py", line 237, in _build_model_optimizer
(TaskRunner pid=3845197) self.tokenizer = hf_tokenizer(local_path, trust_remote_code=trust_remote_code)
(TaskRunner pid=3845197) File "/data1/yyy25/verl-internvl/verl/utils/tokenizer.py", line 64, in hf_tokenizer
(TaskRunner pid=3845197) config = AutoConfig.from_pretrained(name_or_path, trust_remote_code=trust_remote_code)
(TaskRunner pid=3845197) File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py", line 1156, in from_pretrained
(TaskRunner pid=3845197) trust_remote_code = resolve_trust_remote_code(
(TaskRunner pid=3845197) File "/home/yyy25/miniconda3/envs/azr/lib/python3.10/site-packages/transformers/dynamic_module_utils.py", line 731, in resolve_trust_remote_code
(TaskRunner pid=3845197) raise ValueError(
(TaskRunner pid=3845197) ValueError: The repository /data1/yyy25/datasets/InternVL3-1B contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co//data1/yyy25/datasets/InternVL3-1B.
(TaskRunner pid=3845197) Please pass the argument trust_remote_code=True to allow custom code to be run.

Do you have some ideas about how to solve the problems?Thank you

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions