-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Description
when using pip install git+https://github.com/huggingface/trl.git@bd5307e9ecca6b0985381499250f589dab091605 , for grpo training with vllm 0.17.0 , the error said "ValueError: There is no module or parameter named 'model' in Qwen3_5ForConditionalGeneration. The available parameters belonging to (Qwen3_5ForConditionalGeneration) are: {'visual.blocks.3.norm2.bias' ............." , this is for both vllm_mode is "colocate" and "server" .
Reproduction
import torch
import re
import os
import ast
from rapidfuzz import fuzz
from datasets import load_dataset, Dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig # <--- NEW IMPORT
)
from peft import (
LoraConfig,
get_peft_model,
prepare_model_for_kbit_training # <--- NEW IMPORT
)
from trl import GRPOConfig, GRPOTrainer
from transformers import TrainerCallback
import gc
from transformers import TrainerCallback
class NuclearCacheClearCallback(TrainerCallback):
"""
A callback that forces Python garbage collection and clears the CUDA/ROCm cache
at the end of every training step.
"""
def on_step_end(self, args, state, control, **kwargs):
# 1. Force Python Garbage Collection (cleans up orphaned variables)
gc.collect()
# 2. Clear the GPU Cache (releases memory back to OS)
torch.cuda.empty_cache()
# --- 1. CONFIGURATION ---
MODEL_NAME = "Qwen/Qwen3.5-0.8B"
# We set this slightly lower than the physical max (70k) to be safe
MAX_PROMPT_LENGTH = 20000
MAX_COMPLETION_LENGTH = 5000
LORA_RANK = 32
OUTPUT_DIR = "outputs_qwen_70k_4bit" # Updated output dir name for clarity
# --- 2. REWARD FUNCTIONS ---
def get_completion_text(completion):
if isinstance(completion, str): return completion
if isinstance(completion, list):
if len(completion) > 0 and isinstance(completion[-1], dict) and "content" in completion[-1]:
return completion[-1]["content"]
if len(completion) > 0 and isinstance(completion[0], str):
return "".join(completion)
return str(completion)
def extract_answer(text):
try:
cleaned_text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
extracted_data = re.findall(r'<answer>(.*?)</answer>', cleaned_text, flags=re.DOTALL)
return extracted_data[0].strip() if extracted_data else None
except: return None
def calculate_robust_rl_reward(prompts, completions, answer, **kwargs):
responses = [get_completion_text(c) for c in completions]
extracted_responses = [extract_answer(r) for r in responses]
score_list = []
for extracted, gt_list in zip(extracted_responses, answer):
if not extracted:
score_list.append(-1.0)
continue
try:
generated_list = ast.literal_eval(extracted)
except:
score_list.append(-2.0)
continue
if len(generated_list) > 5:
score_list.append(-2.0)
continue
if not gt_list:
score_list.append(1.0 if not generated_list else -1.0)
else:
matches = 0
for gen in generated_list:
# 1. NEW: Skip anything that isn't a string (like the Ellipsis object)
if not isinstance(gen, str):
continue
for gt in gt_list:
# 2. NEW: Safety check for ground truth as well
if not isinstance(gt, str):
continue
if fuzz.token_set_ratio(gt.lower(), gen.lower()) >= 90:
matches += 1
break
score_list.append(1.0 + (matches/len(gt_list)*0.8) if matches > 0 else -0.5)
return score_list
def strict_format_reward_func(prompts, completions, **kwargs):
responses = [get_completion_text(c) for c in completions]
return [0.125 if "<think>" in r and "<answer>" in r else 0.0 for r in responses]
def xmlcount_reward_func(prompts, completions, **kwargs):
responses = [get_completion_text(c) for c in completions]
scores = []
for text in responses:
count = 0.0
for tag in ["<think>", "</think>", "<answer>", "</answer>"]:
if text.count(tag) == 1: count += 0.125
else: count -= 0.125 * text.count(tag)
scores.append(count)
return scores
# --- 3. MODEL & TOKENIZER SETUP (4-BIT QLoRA) ---
print("Loading Model in 4-bit (NF4)...")
# Load Tokenizer first for the dataset logic
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
# Define 4-bit Quantization Configuration
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16, # Compute in bf16 for stability
bnb_4bit_quant_type="nf4", # Normalized Float 4 (standard for QLoRA)
bnb_4bit_use_double_quant=True # Double quantization saves a bit more memory
)
local_rank = int(os.environ.get("LOCAL_RANK", 0))
torch.cuda.set_device(local_rank)
device_map = {"": local_rank}
print(f"Process Rank {local_rank} is using GPU {device_map} for Training.")
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16, # Use pure BF16
attn_implementation= "sdpa",#"sdpa", #"flash_attention_2",
device_map=device_map
)
# Prepare model for k-bit training (Important: enables gradient checkpointing and freezes base model)
#model = prepare_model_for_kbit_training(model)
# LoRA Config
peft_config = LoraConfig(
r=LORA_RANK,
lora_alpha=LORA_RANK * 2,
target_modules="all-linear",
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False})
model.config.use_cache = False
print(f"Current Attention Implementation: {model.config._attn_implementation}")
print(f"Model Ready. Trainable Params: {model.print_trainable_parameters()}")
# --- 4. DATASET SETUP (ACCURATE TOKEN COUNTING) ---
def get_gsm8k_questions_accurate(split="train") -> Dataset:
print("Loading and filtering dataset with accurate token counts...")
# Load raw dataset
data = load_dataset('negative-data', split='train')
# 1. Apply Thinking Template (From your code78)
def apply_thinking_template(example):
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": example['input']}
]
# We use the global tokenizer loaded above
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=True # Uncomment if your specific tokenizer version supports this flag
)
return {
'prompt': prompt,
'answer': example["output"]
}
data = data.map(apply_thinking_template)
# 2. Filter based on ACCURATE token length
def check_token_length(example):
# This is the slow but accurate part
tokens = tokenizer.encode(example['prompt'], add_special_tokens=False)
return len(tokens) <= MAX_PROMPT_LENGTH
# Apply filter
filtered_data = data.filter(check_token_length)
# Shuffle and Select
shuffled_data = filtered_data.shuffle(seed=42)
# Select up to 1300, or fewer if the filter removed them
count = min(len(shuffled_data), 1300)
final_data = shuffled_data.select(range(count))
print(f"Dataset ready. Original: {len(data)}, Filtered (<= {MAX_PROMPT_LENGTH}): {len(filtered_data)}, Final: {len(final_data)}")
return final_data
dataset = get_gsm8k_questions_accurate()
# --- 5. TRAINING CONFIG ---
training_args = GRPOConfig(
temperature=1,
# top_p=0.8,
# repetition_penalty=1.2,
# top_k=20,
output_dir=OUTPUT_DIR,
learning_rate=2e-6,
lr_scheduler_type="cosine",
warmup_ratio=0.1,
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
gradient_checkpointing=True,
# Allow slightly more than the filter to avoid edge case crashes
max_completion_length=MAX_COMPLETION_LENGTH,
num_generations=4, # Keep low for 70k context stability
bf16=True,
fp16=False,
optim="paged_adamw_8bit",
max_grad_norm=0.1,
logging_steps=1,
report_to="wandb",
save_strategy="steps",
save_steps=20,
logging_first_step=True,
# vLLM Configuration (Multi-GPU Colocate)
use_vllm=True,
#vllm_mode="colocate",
#vllm_device="cuda:0",
gradient_checkpointing_kwargs={"use_reentrant": False},
vllm_server_host='0.0.0.0',
vllm_server_port= 8000,
vllm_server_timeout= 1200.0,
#deepspeed="ds_config.json",
#vllm_gpu_memory_utilization=0.50,
#torch_empty_cache_steps = 1,
)
# --- 6. TRAINER ---
trainer = GRPOTrainer(
model=model,
processing_class=tokenizer,
reward_funcs=[xmlcount_reward_func, strict_format_reward_func, calculate_robust_rl_reward],
args=training_args,
train_dataset=dataset,
callbacks=[NuclearCacheClearCallback()]
)
print("Starting Multi-GPU GRPO Training...")
trainer.train()for vllm server : CUDA_VISIBLE_DEVICES=0 nohup trl vllm-serve --model Qwen/Qwen3.5-0.8B --tensor_parallel_size 1 --max-model-len 30000 > vllm123.log 2>&1 &
for trl training: CUDA_VISIBLE_DEVICES=1 nohup accelerate launch test1.py > output.log 2>&1 &
vllm server outputs:
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO: 127.0.0.1:40184 - "GET /health/ HTTP/1.1" 200 OK
INFO: 127.0.0.1:40190 - "GET /get_world_size/ HTTP/1.1" 200 OK
INFO: 127.0.0.1:40198 - "POST /init_communicator/ HTTP/1.1" 200 OK
INFO: 127.0.0.1:40198 - "POST /update_named_param/ HTTP/1.1" 200 OK
INFO: 127.0.0.1:40198 - "POST /update_named_param/ HTTP/1.1" 200 OK
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] Invocation of collective_rpc method failed
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] Traceback (most recent call last):
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/v1/engine/core.py", line 1219, in _invoke_utility_method
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] result = get_result()
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ^^^^^^^^^^^^
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/v1/engine/core.py", line 1200, in <lambda>
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] get_result = lambda: (method := getattr(self, method_name)) and method(
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ^^^^^^^
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/v1/engine/core.py", line 733, in collective_rpc
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] return self.model_executor.collective_rpc(method, timeout, args, kwargs)
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/v1/executor/uniproc_executor.py", line 76, in collective_rpc
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] result = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/v1/serial_utils.py", line 459, in run_method
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] return func(*args, **kwargs)
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/trl/scripts/vllm_serve.py", line 147, in update_named_param
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] self.model_runner.model.load_weights(weights=[(name, weight)])
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/model_executor/models/qwen3_5.py", line 752, in load_weights
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/model_executor/model_loader/reload/torchao_decorator.py", line 50, in patched_model_load_weights
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] return original_load_weights(self, weights, *args, **kwargs)
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/model_executor/models/utils.py", line 340, in load_weights
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] autoloaded_weights = set(self._load_module("", self.module, weights))
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] File "/usr/local/lib/python3.11/dist-packages/vllm/model_executor/models/utils.py", line 324, in _load_module
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] raise ValueError(msg)
(EngineCore_DP0 pid=963) ERROR 03-10 17:16:32 [core.py:1229] ValueError: There is no module or parameter named 'model' in Qwen3_5ForConditionalGeneration. The available parameters belonging to (Qwen3_5ForConditionalGeneration) are: {'visual.blocks.3.norm2.bias', 'language_model.model.layers.5.post_attention_layernorm.weight', 'visual.blocks.5.norm1.weight', 'language_model.model.layers.1.linear_attn.out_proj.weight', 'visual.blocks.0.attn.proj.weight', 'language_model.model.layers.16.linear_attn.in_proj_ba.weight', 'visual.blocks.3.norm1.weight', 'language_model.model.layers.7.self_attn.q_norm.weight', 'language_model.model.layers.2.linear_attn.dt_bias', 'visual.blocks.11.attn.proj.weight', 'visual.merger.norm.weight', 'visual.blocks.6.mlp.linear_fc1.bias', 'language_model.model.layers.22.linear_attn.conv1d.weight', 'language_model.model.layers.16.linear_attn.A_log', 'language_model.model.layers.2.mlp.down_proj.weight', 'language_model.model.layers.20.mlp.down_proj.weight', 'language_model.model.layers.17.linear_attn.in_proj_ba.weight', 'visual.blocks.8.mlp.linear_fc1.weight', 'language_model.model.layers.13.linear_attn.in_proj_qkvz.weight', 'language_model.model.layers.22.linear_attn.out_proj.weight', 'language_model.model.layers.0.post_attention_layernorm.weight', 'visual.blocks.1.norm1.weight', 'visual.blocks.6.norm1.bias', 'language_model.model.layers.13.linear_attn.norm.weight', 'visual.blocks.6.mlp.linear_fc1.weight', 'language_model.model.layers.9.post_attention_layernorm.weight', 'visual.blocks.6.attn.proj.weight', 'visual.blocks.8.attn.proj.bias', 'visual.blocks.9.norm2.bias', 'language_model.model.layers.2.linear_attn.out_proj.weight', 'language_model.model.layers.7.mlp.gate_up_proj.weight', 'language_model.model.layers.22.input_layernorm.weight', 'language_model.model.layers.22.linear_attn.dt_bias', 'language_model.model.layers.14.mlp.gate_up_proj.weight', 'language_model.model.layers.11.self_attn.k_norm.weight', 'visual.blocks.9.attn.qkv.bias', 'visual.blocks.4.attn.proj.weight', 'visual.blocks.1.norm1.bias', 'visual.blocks.6.attn.proj.bias', 'visual.blocks.0.attn.proj.bias', 'language_model.model.layers.3.self_attn.k_norm.weight', 'language_model.model.layers.19.self_attn.q_norm.weight', 'language_model.model.layers.7.post_attention_layernorm.weight', 'visual.blocks.8.attn.qkv.weight', 'language_model.model.layers.6.input_layernorm.weight', 'language_model.model.layers.3.input_layernorm.weight', 'visual.blocks.2.norm2.weight', 'language_model.model.layers.0.linear_attn.out_proj.weight', 'language_model.model.layers.21.linear_attn.in_proj_qkvz.weight', 'visual.blocks.7.attn.proj.weight', 'visual.blocks.5.mlp.linear_fc1.bias', 'visual.blocks.5.attn.qkv.weight', 'visual.blocks.4.mlp.linear_fc2.weight', 'visual.blocks.10.attn.proj.bias', 'language_model.model.layers.2.post_attention_layernorm.weight', 'language_model.model.layers.20.linear_attn.dt_bias', 'visual.blocks.5.attn.qkv.bias', 'visual.blocks.2.norm1.weight', 'language_model.model.layers.11.mlp.gate_up_proj.weight', 'language_model.model.layers.17.input_layernorm.weight', 'visual.blocks.4.norm2.weight', 'visual.blocks.7.mlp.linear_fc2.weight', 'language_model.model.layers.12.linear_attn.in_proj_ba.weight', 'language_model.model.layers.5.linear_attn.dt_bias', 'visual.blocks.3.norm1.bias', 'language_model.model.layers.2.linear_attn.norm.weight', 'language_model.model.layers.22.linear_attn.in_proj_qkvz.weight', 'visual.blocks.1.attn.proj.weight', 'visual.blocks.2.attn.qkv.weight', 'visual.blocks.7.attn.qkv.weight', 'visual.merger.linear_fc1.weight', 'language_model.model.layers.4.post_attention_layernorm.weight', 'visual.blocks.10.norm1.bias', 'language_model.model.layers.5.mlp.gate_up_proj.weight', 'language_model.model.layers.13.input_layernorm.weight', 'visual.blocks.0.mlp.linear_fc2.bias', 'visual.blocks.3.norm2.weight', 'visual.blocks.1.norm2.bias', 'language_model.model.layers.12.linear_attn.in_proj_qkvz.weight', 'language_model.model.layers.16.mlp.down_proj.weight', 'language_model.model.layers.14.linear_attn.in_proj_qkvz.weight', 'language_model.model.layers.15.self_attn.qkv_proj.weight', 'visual.merger.linear_fc2.bias', 'visual.blocks.8.mlp.linear_fc2.bias', 'language_model.model.layers.11.self_attn.qkv_proj.weight', 'visual.blocks.2.mlp.linear_fc2.weight', 'visual.blocks.11.mlp.linear_fc2.bias', 'visual.blocks.8.norm2.weight', 'language_model.model.layers.19.mlp.gate_up_proj.weight', 'visual.blocks.9.mlp.linear_fc1.bias', 'language_model.model.layers.16.mlp.gate_up_proj.weight', 'language_model.model.layers.17.post_attention_layernorm.weight', 'visual.blocks.4.attn.proj.bias', 'visual.blocks.2.attn.proj.weight', 'language_model.model.layers.13.linear_attn.dt_bias', 'language_model.model.layers.7.self_attn.qkv_proj.weight', 'language_model.model.layers.6.linear_attn.norm.weight', 'language_model.model.layers.17.linear_attn.in_proj_qkvz.weight', 'visual.blocks.1.mlp.linear_fc1.bias', 'language_model.model.layers.19.self_attn.k_norm.weight', 'visual.blocks.1.mlp.linear_fc2.weight', 'language_model.model.layers.17.mlp.gate_up_proj.weight', 'language_model.model.layers.9.linear_attn.A_log', 'visual.blocks.4.attn.qkv.bias', 'visual.blocks.3.attn.proj.bias', 'language_model.model.layers.4.linear_attn.norm.weight', 'language_model.model.layers.9.linear_attn.in_proj_ba.weight', 'language_model.model.layers.2.mlp.gate_up_proj.weight', 'language_model.model.layers.17.linear_attn.out_proj.weight', 'language_model.model.layers.23.self_attn.qkv_proj.weight', 'language_model.model.layers.9.linear_attn.dt_bias', 'visual.patch_embed.proj.bias', 'language_model.model.layers.4.linear_attn.in_proj_ba.weight', 'language_model.model.layers.12.mlp.gate_up_proj.weight', 'language_model.model.layers.0.linear_attn.A_log', 'language_model.model.layers.4.linear_attn.conv1d.weight', 'visual.blocks.9.mlp.linear_fc1.weight', 'visual.blocks.10.norm1.weight', 'language_model.model.layers.14.linear_attn.conv1d.weight', 'language_model.model.layers.13.linear_attn.in_proj_ba.weight', 'visual.blocks.4.norm1.weight', 'language_model.model.layers.14.linear_attn.out_proj.weight', 'language_model.model.layers.16.linear_attn.norm.weight', 'language_model.model.layers.4.input_layernorm.weight', 'language_model.model.layers.0.linear_attn.in_proj_qkvz.weight', 'visual.blocks.4.norm2.bias', 'language_model.model.layers.12.linear_attn.out_proj.weight', 'language_model.model.layers.10.mlp.down_proj.weight', 'language_model.model.layers.4.linear_attn.out_proj.weight', 'language_model.model.layers.14.post_attention_layernorm.weight', 'language_model.model.layers.18.linear_attn.in_proj_qkvz.weight', 'visual.blocks.9.mlp.linear_fc2.weight', 'visual.blocks.7.mlp.linear_fc1.bias', 'language_model.model.layers.11.self_attn.q_norm.weight', 'visual.blocks.0.norm1.weight', 'visual.blocks.1.mlp.linear_fc2.bias', 'language_model.model.layers.4.linear_attn.in_proj_qkvz.weight', 'language_model.model.layers.12.linear_attn.dt_bias', 'language_model.model.layers.18.mlp.gate_up_proj.weight', 'language_model.model.layers.6.linear_attn.dt_bias', 'visual.blocks.10.attn.qkv.weight', 'language_model.model.layers.14.input_layernorm.weight', 'language_model.model.layers.13.post_attention_layernorm.weight', 'language_model.model.layers.23.self_attn.o_proj.weight', 'language_model.model.layers.22.linear_attn.in_proj_ba.weight', 'language_model.model.layers.2.linear_attn.in_proj_qkvz.weight', 'visual.blocks.8.norm1.weight', 'language_model.model.layers.8.input_layernorm.weight', 'language_model.model.layers.12.input_layernorm.weight', 'visual.blocks.8.attn.proj.weight', 'visual.blocks.0.norm1.bias', 'language_model.model.layers.20.linear_attn.in_proj_ba.weight', 'language_model.model.layers.20.input_layernorm.weight', 'language_model.model.layers.7.input_layernorm.weight', 'visual.blocks.8.norm2.bias', 'language_model.model.layers.8.linear_attn.in_proj_ba.weight', 'visual.blocks.0.attn.qkv.weight', 'visual.blocks.5.attn.proj.weight', 'language_model.model.layers.8.mlp.gate_up_proj.weight', 'language_model.model.layers.15.mlp.down_proj.weight', 'language_model.model.layers.16.linear_attn.dt_bias', 'language_model.model.layers.20.post_attention_layernorm.weight', 'language_model.model.layers.16.linear_attn.conv1d.weight', 'language_model.model.layers.1.linear_attn.A_log', 'visual.blocks.4.mlp.linear_fc1.bias', 'language_model.model.layers.8.linear_attn.conv1d.weight', 'language_model.model.layers.8.post_attention_layernorm.weight', 'visual.blocks.1.attn.qkv.bias', 'visual.blocks.6.norm2.weight', 'language_model.model.layers.21.linear_attn.dt_bias', 'language_model.model.layers.2.input_layernorm.weight', 'language_model.model.layers.9.mlp.gate_up_proj.weight', 'language_model.model.layers.12.linear_attn.norm.weight', 'visual.blocks.3.attn.proj.weight', 'language_model.model.layers.16.linear_attn.in_proj_qkvz.weight', 'language_model.model.layers.7.mlp.down_proj.weight', 'language_model.model.layers.22.mlp.gate_up_proj.weight', 'visual.blocks.8.attn.qkv.bias', 'language_model.model.layers.11.post_attention_layernorm.weight', 'language_model.model.layers.6.post_attention_layernorm.weight', 'visual.blocks.1.attn.proj.bias', 'language_model.model.layers.23.post_attention_layernorm.weight', 'language_model.model.layers.4.mlp.down_proj.weight', 'language_model.model.layers.6.linear_attn.in_proj_ba.weight', 'visual.blocks.2.mlp.linear_fc1.bias', 'language_model.model.layers.10.linear_attn.A_log', 'language_model.model.layers.21.mlp.gate_up_proj.weight', 'language_model.model.layers.18.linear_attn.A_log', 'visual.blocks.7.mlp.linear_fc2.bias', 'language_model.model.layers.19.self_attn.qkv_proj.weight', 'visual.blocks.3.attn.qkv.weight', 'visual.blocks.5.norm1.bias', 'language_model.model.layers.6.linear_attn.out_proj.weight', 'language_model.model.layers.12.linear_attn.A_log', 'language_model.model.layers.2.linear_attn.conv1d.weight', 'language_model.model.layers.3.mlp.gate_up_proj.weight', 'visual.blocks.0.norm2.weight', 'language_model.model.embed_tokens.weight', 'language_model.model.layers.21.linear_attn.conv1d.weight', 'language_model.model.layers.12.post_attention_layernorm.weight', 'visual.pos_embed.weight', 'visual.blocks.11.mlp.linear_fc2.weight', 'language_model.model.layers.1.mlp.gate_up_proj.weight', 'language_model.model.layers.18.mlp.down_proj.weight', 'language_model.model.layers.20.linear_attn.out_proj.weight', 'language_model.model.layers.19.input_layernorm.weight', 'language_model.model.layers.9.input_layernorm.weight', 'language_model.model.layers.13.mlp.down_proj.weight', 'visual.blocks.9.norm1.bias', 'language_model.model.layers.23.self_attn.k_norm.weight', 'visual.blocks.2.norm1.bias', 'visual.blocks.7.norm2.weight', 'visual.blocks.8.mlp.linear_fc2.weight', 'language_model.model.layers.17.linear_attn.A_log', 'language_model.model.layers.18.input_layernorm.weight', 'language_model.model.layers.16.linear_attn.out_proj.weight', 'language_model.model.layers.21.linear_attn.in_proj_ba.weight', 'visual.blocks.5.mlp.linear_fc1.weight', 'visual.blocks.2.attn.proj.bias', 'language_model.model.layers.10.linear_attn.conv1d.weight', 'visual.blocks.5.norm2.weight', 'visual.blocks.3.mlp.linear_fc2.weight', 'visual.blocks.11.norm1.weight', 'language_model.model.layers.11.input_layernorm.weight', 'language_model.model.layers.21.linear_attn.out_proj.weight', 'language_model.model.layers.0.mlp.down_proj.weight', 'language_model.model.layers.1.post_attention_layernorm.weight', 'language_model.model.layers.9.mlp.down_proj.weight', 'visual.blocks.7.attn.proj.bias', 'visual.blocks.5.norm2.bias', 'language_model.model.layers.15.self_attn.o_proj.weight', 'visual.blocks.2.attn.qkv.bias', 'language_model.model.layers.18.post_attention_layernorm.weight', 'visual.blocks.11.mlp.linear_fc1.bias', 'language_model.model.layers.5.input_layernorm.weight', 'language_model.model.layers.5.linear_attn.norm.weight', 'language_model.model.layers.1.linear_attn.in_proj_ba.weight', 'language_model.model.layers.9.linear_attn.out_proj.weight', 'language_model.model.layers.18.linear_attn.conv1d.weight', 'visual.blocks.4.norm1.bias', 'language_model.model.layers.1.mlp.down_proj.weight', 'visual.blocks.0.attn.qkv.bias', 'visual.blocks.1.mlp.linear_fc1.weight', 'language_model.model.layers.5.linear_attn.out_proj.weight', 'visual.blocks.11.norm2.bias', 'visual.blocks.0.mlp.linear_fc1.weight', 'visual.blocks.10.attn.proj.weight', 'visual.blocks.10.mlp.linear_fc2.weight', 'language_model.model.layers.19.post_attention_layernorm.weight', 'language_model.model.layers.17.linear_attn.norm.weight', 'language_model.model.layers.13.mlp.gate_up_proj.weight', 'visual.blocks.4.mlp.linear_fc2.bias', 'visual.blocks.0.norm2.bias', 'visual.blocks.10.norm2.weight', 'language_model.model.layers.0.linear_attn.dt_bias', 'language_model.model.layers.22.post_attention_layernorm.weight', 'language_model.model.layers.22.linear_attn.A_log', 'language_model.model.layers.10.linear_attn.out_proj.weight', 'language_model.model.layers.0.linear_attn.norm.weight', 'language_model.model.layers.0.mlp.gate_up_proj.weight', 'language_model.model.layers.5.linear_attn.conv1d.weight', 'language_model.model.layers.8.linear_attn.dt_bias', 'language_model.model.layers.22.mlp.down_proj.weight', 'visual.blocks.4.mlp.linear_fc1.weight', 'language_model.model.layers.10.linear_attn.norm.weight', 'language_model.model.layers.6.linear_attn.in_proj_qkvz.weight', 'language_model.model.layers.8.linear_attn.out_proj.weight', 'language_model.model.layers.8.linear_attn.A_log', 'visual.blocks.6.mlp.linear_fc2.weight', 'visual.blocks.1.norm2.weight', 'language_model.model.layers.19.self_attn.o_proj.weight', 'language_model.model.layers.21.input_layernorm.weight', 'language_model.model.layers.6.linear_attn.A_log', 'visual.blocks.5.attn.proj.bias', 'visual.blocks.7.norm1.bias', 'language_model.model.layers.15.input_layernorm.weight', 'visual.blocks.9.attn.proj.bias', 'visual.blocks.10.mlp.linear_fc1.weight', 'language_model.model.layers.10.post_attention_layernorm.weight', 'language_model.model.layers.0.linear_attn.in_proj_ba.weight', 'language_model.model.layers.20.linear_attn.in_proj_qkvz.weight', 'visual.blocks.2.mlp.linear_fc1.weight', 'visual.blocks.1.attn.qkv.weight', 'language_model.model.layers.15.self_attn.k_norm.weight', 'language_model.model.layers.18.linear_attn.dt_bias', 'visual.blocks.6.norm2.bias', 'language_model.model.layers.1.linear_attn.dt_bias', 'language_model.model.layers.9.linear_attn.conv1d.weight', 'language_model.model.layers.17.mlp.down_proj.weight', 'language_model.model.layers.1.linear_attn.conv1d.weight', 'language_model.model.layers.22.linear_attn.norm.weight', 'language_model.model.layers.20.mlp.gate_up_proj.weight', 'visual.blocks.9.attn.proj.weight', 'language_model.model.layers.10.linear_attn.in_proj_ba.weight', 'visual.blocks.6.norm1.weight', 'language_model.model.layers.4.linear_attn.A_log', 'language_model.model.layers.2.linear_attn.in_proj_ba.weight', 'language_model.model.layers.2.linear_attn.A_log', 'visual.blocks.9.norm2.weight', 'language_model.model.layers.0.input_layernorm.weight', 'language_model.model.layers.10.linear_attn.in_proj_qkvz.weight', 'visual.blocks.2.norm2.bias', 'language_model.model.layers.1.input_layernorm.weight', 'visual.blocks.9.mlp.linear_fc2.bias', 'visual.blocks.11.norm1.bias', 'language_model.model.layers.13.linear_attn.out_proj.weight', 'language_model.model.layers.17.linear_attn.dt_bias', 'language_model.model.layers.11.self_attn.o_proj.weight', 'language_model.model.norm.weight', 'language_model.model.layers.9.linear_attn.in_proj_qkvz.weight', 'visual.blocks.9.norm1.weight', 'language_model.model.layers.6.linear_attn.conv1d.weight', 'visual.blocks.5.mlp.linear_fc2.bias', 'language_model.model.layers.3.self_attn.o_proj.weight', 'language_model.model.layers.20.linear_attn.conv1d.weight', 'visual.blocks.7.mlp.linear_fc1.weight', 'visual.blocks.4.attn.qkv.weight', 'language_model.model.layers.3.mlp.down_proj.weight', 'visual.blocks.7.norm2.bias', 'visual.blocks.9.attn.qkv.weight', 'language_model.model.layers.7.self_attn.o_proj.weight', 'visual.blocks.3.attn.qkv.bias', 'language_model.model.layers.5.mlp.down_proj.weight', 'language_model.model.layers.15.self_attn.q_norm.weight', 'language_model.model.layers.3.self_attn.q_norm.weight', 'language_model.model.layers.20.linear_attn.norm.weight', 'language_model.model.layers.0.linear_attn.conv1d.weight', 'language_model.model.layers.8.mlp.down_proj.weight', 'visual.blocks.0.mlp.linear_fc2.weight', 'language_model.model.layers.1.linear_attn.norm.weight', 'language_model.model.layers.18.linear_attn.norm.weight', 'language_model.model.layers.15.post_attention_layernorm.weight', 'language_model.model.layers.8.linear_attn.norm.weight', 'language_model.model.layers.21.linear_attn.A_log', 'language_model.model.layers.13.linear_attn.conv1d.weight', 'visual.patch_embed.proj.weight', 'visual.blocks.3.mlp.linear_fc1.bias', 'language_model.model.layers.9.linear_attn.norm.weight', 'language_model.model.layers.3.post_attention_layernorm.weight', 'language_model.model.layers.11.mlp.down_proj.weight', 'language_model.model.layers.14.mlp.down_proj.weight', 'language_model.model.layers.23.mlp.down_proj.weight', 'language_model.model.layers.1.linear_attn.in_proj_qkvz.weight', 'visual.blocks.8.norm1.bias', 'visual.blocks.10.mlp.linear_fc2.bias', 'language_model.model.layers.16.post_attention_layernorm.weight', 'language_model.model.layers.6.mlp.down_proj.weight', 'visual.blocks.7.norm1.weight', 'visual.blocks.6.mlp.linear_fc2.bias', 'language_model.model.layers.14.linear_attn.A_log', 'language_model.model.layers.23.input_layernorm.weight', 'visual.blocks.3.mlp.linear_fc2.bias', 'visual.blocks.3.mlp.linear_fc1.weight', 'language_model.model.layers.21.linear_attn.norm.weight', 'language_model.model.layers.18.linear_attn.out_proj.weight', 'language_model.model.layers.16.input_layernorm.weight', 'visual.blocks.6.attn.qkv.weight', 'language_model.model.layers.4.mlp.gate_up_proj.weight', 'language_model.model.layers.15.mlp.gate_up_proj.weight', 'language_model.model.layers.12.linear_attn.conv1d.weight', 'visual.merger.linear_fc1.bias', 'language_model.model.layers.19.mlp.down_proj.weight', 'visual.blocks.8.mlp.linear_fc1.bias', 'language_model.model.layers.18.linear_attn.in_proj_ba.weight', 'visual.blocks.7.attn.qkv.bias', 'language_model.model.layers.6.mlp.gate_up_proj.weight', 'visual.blocks.10.mlp.linear_fc1.bias', 'language_model.model.layers.8.linear_attn.in_proj_qkvz.weight', 'language_model.model.layers.7.self_attn.k_norm.weight', 'language_model.model.layers.5.linear_attn.A_log', 'visual.blocks.2.mlp.linear_fc2.bias', 'language_model.model.layers.10.linear_attn.dt_bias', 'visual.blocks.0.mlp.linear_fc1.bias', 'language_model.model.layers.12.mlp.down_proj.weight', 'language_model.model.layers.3.self_attn.qkv_proj.weight', 'language_model.model.layers.14.linear_attn.in_proj_ba.weight', 'language_model.model.layers.5.linear_attn.in_proj_qkvz.weight', 'language_model.model.layers.4.linear_attn.dt_bias', 'visual.blocks.5.mlp.linear_fc2.weight', 'visual.blocks.11.attn.proj.bias', 'language_model.model.layers.21.mlp.down_proj.weight', 'visual.blocks.6.attn.qkv.bias', 'language_model.model.layers.10.mlp.gate_up_proj.weight', 'language_model.model.layers.14.linear_attn.norm.weight', 'visual.blocks.11.norm2.weight', 'language_model.model.layers.5.linear_attn.in_proj_ba.weight', 'language_model.model.layers.23.mlp.gate_up_proj.weight', 'visual.blocks.10.norm2.bias', 'visual.blocks.11.attn.qkv.bias', 'visual.merger.norm.bias', 'visual.blocks.11.attn.qkv.weight', 'visual.blocks.10.attn.qkv.bias', 'language_model.model.layers.14.linear_attn.dt_bias', 'language_model.model.layers.23.self_attn.q_norm.weight', 'language_model.model.layers.20.linear_attn.A_log', 'language_model.model.layers.17.linear_attn.conv1d.weight', 'visual.merger.linear_fc2.weight', 'visual.blocks.11.mlp.linear_fc1.weight', 'language_model.model.layers.10.input_layernorm.weight', 'language_model.model.layers.21.post_attention_layernorm.weight', 'language_model.model.layers.13.linear_attn.A_log'}
System Info
trl : pip install git+https://github.com/huggingface/trl.git@bd5307e9ecca6b0985381499250f589dab091605
Package Version
accelerate 1.13.0
aiohappyeyeballs 2.6.1
aiohttp 3.13.3
aiosignal 1.4.0
annotated-doc 0.0.4
annotated-types 0.7.0
anthropic 0.84.0
anyio 4.12.1
apache-tvm-ffi 0.1.9
argon2-cffi 23.1.0
argon2-cffi-bindings 21.2.0
arrow 1.3.0
astor 0.8.1
asttokens 2.4.1
async-lru 2.0.4
attrs 24.2.0
babel 2.16.0
beautifulsoup4 4.12.3
bitsandbytes 0.49.2
blake3 1.0.8
bleach 6.1.0
blinker 1.4
cachetools 7.0.5
causal_conv1d 1.6.1
cbor2 5.8.0
certifi 2024.8.30
cffi 1.17.1
charset-normalizer 3.3.2
click 8.3.1
cloudpickle 3.1.2
comm 0.2.2
compressed-tensors 0.13.0
cryptography 3.4.8
cuda-bindings 12.9.4
cuda-pathfinder 1.4.1
cuda-python 12.9.4
cupy-cuda12x 14.0.1
datasets 4.7.0
dbus-python 1.2.18
debugpy 1.8.5
decorator 5.1.1
defusedxml 0.7.1
depyf 0.20.0
dill 0.4.0
diskcache 5.6.3
distro 1.7.0
dnspython 2.8.0
docstring_parser 0.17.0
einops 0.8.2
email-validator 2.3.0
entrypoints 0.4
executing 2.1.0
fastapi 0.135.1
fastapi-cli 0.0.24
fastapi-cloud-cli 0.14.1
fastar 0.8.0
fastjsonschema 2.20.0
filelock 3.25.1
fla-core 0.4.1
flash_attn 2.8.3
flash-linear-attention 0.4.1
flashinfer-python 0.6.4
fqdn 1.5.1
frozenlist 1.8.0
fsspec 2024.2.0
gguf 0.18.0
gitdb 4.0.12
GitPython 3.1.46
googleapis-common-protos 1.73.0
grpcio 1.78.0
grpcio-reflection 1.78.0
h11 0.14.0
hf-xet 1.3.2
httpcore 1.0.5
httplib2 0.20.2
httptools 0.7.1
httpx 0.27.2
httpx-sse 0.4.3
huggingface_hub 1.6.0
idna 3.10
ijson 3.5.0
importlib_metadata 8.7.1
interegular 0.3.3
ipykernel 6.29.5
ipython 8.27.0
ipython-genutils 0.2.0
ipywidgets 8.1.5
isoduration 20.11.0
jedi 0.19.1
jeepney 0.7.1
Jinja2 3.1.6
jiter 0.13.0
jmespath 1.1.0
json5 0.9.25
jsonpointer 3.0.0
jsonschema 4.23.0
jsonschema-specifications 2023.12.1
jupyter-archive 3.4.0
jupyter_client 7.4.9
jupyter_contrib_core 0.4.2
jupyter_contrib_nbextensions 0.7.0
jupyter_core 5.7.2
jupyter-events 0.10.0
jupyter-highlight-selected-word 0.2.0
jupyter-lsp 2.2.5
jupyter_nbextensions_configurator 0.6.4
jupyter_server 2.14.2
jupyter_server_terminals 0.5.3
jupyterlab 4.2.5
jupyterlab_pygments 0.3.0
jupyterlab_server 2.27.3
jupyterlab_widgets 3.0.13
kaldi-native-fbank 1.22.3
keyring 23.5.0
lark 1.2.2
launchpadlib 1.10.16
lazr.restfulclient 0.14.4
lazr.uri 1.0.6
llguidance 1.3.0
llvmlite 0.44.0
lm-format-enforcer 0.11.3
loguru 0.7.3
lxml 5.3.0
markdown-it-py 4.0.0
MarkupSafe 2.1.5
matplotlib-inline 0.1.7
mcp 1.26.0
mdurl 0.1.2
mistral_common 1.9.1
mistune 3.0.2
model-hosting-container-standards 0.1.13
more-itertools 8.10.0
mpmath 1.3.0
msgpack 1.1.2
msgspec 0.20.0
multidict 6.7.1
multiprocess 0.70.18
nbclassic 1.1.0
nbclient 0.10.0
nbconvert 7.16.4
nbformat 5.10.4
nest-asyncio 1.6.0
networkx 3.2.1
ninja 1.13.0
notebook 6.5.5
notebook_shim 0.2.4
numba 0.61.2
numpy 2.2.6
nvidia-cublas-cu12 12.8.4.1
nvidia-cuda-cupti-cu12 12.8.90
nvidia-cuda-nvrtc-cu12 12.8.93
nvidia-cuda-runtime-cu12 12.8.90
nvidia-cudnn-cu12 9.10.2.21
nvidia-cudnn-frontend 1.19.0
nvidia-cufft-cu12 11.3.3.83
nvidia-cufile-cu12 1.13.1.3
nvidia-curand-cu12 10.3.9.90
nvidia-cusolver-cu12 11.7.3.90
nvidia-cusparse-cu12 12.5.8.93
nvidia-cusparselt-cu12 0.7.1
nvidia-cutlass-dsl 4.4.1
nvidia-cutlass-dsl-libs-base 4.4.1
nvidia-ml-py 13.590.48
nvidia-nccl-cu12 2.27.5
nvidia-nvjitlink-cu12 12.8.93
nvidia-nvshmem-cu12 3.4.5
nvidia-nvtx-cu12 12.8.90
oauthlib 3.2.0
openai 2.24.0
openai-harmony 0.0.8
opencv-python-headless 4.13.0.92
opentelemetry-api 1.40.0
opentelemetry-exporter-otlp 1.40.0
opentelemetry-exporter-otlp-proto-common 1.40.0
opentelemetry-exporter-otlp-proto-grpc 1.40.0
opentelemetry-exporter-otlp-proto-http 1.40.0
opentelemetry-proto 1.40.0
opentelemetry-sdk 1.40.0
opentelemetry-semantic-conventions 0.61b0
opentelemetry-semantic-conventions-ai 0.4.15
outlines_core 0.2.11
overrides 7.7.0
packaging 26.0
pandas 3.0.1
pandocfilters 1.5.1
parso 0.8.4
partial-json-parser 0.2.1.1.post7
peft 0.18.1
pexpect 4.9.0
pillow 12.1.1
pip 24.2
platformdirs 4.3.6
prometheus_client 0.21.0
prometheus-fastapi-instrumentator 7.1.0
prompt_toolkit 3.0.47
propcache 0.4.1
protobuf 6.33.5
psutil 6.0.0
ptyprocess 0.7.0
pure_eval 0.2.3
py-cpuinfo 9.0.0
pyarrow 23.0.1
pybase64 1.4.3
pycountry 26.2.16
pycparser 2.22
pydantic 2.12.5
pydantic_core 2.41.5
pydantic-extra-types 2.11.0
pydantic-settings 2.13.1
Pygments 2.18.0
PyGObject 3.42.1
PyJWT 2.11.0
pyparsing 2.4.7
python-apt 2.4.0+ubuntu4
python-dateutil 2.9.0.post0
python-dotenv 1.2.2
python-json-logger 2.0.7
python-multipart 0.0.22
PyYAML 6.0.2
pyzmq 27.1.0
quack-kernels 0.3.2
RapidFuzz 3.14.3
ray 2.54.0
referencing 0.35.1
regex 2026.2.28
requests 2.32.3
rfc3339-validator 0.1.4
rfc3986-validator 0.1.1
rich 14.3.3
rich-toolkit 0.19.7
rignore 0.7.6
rpds-py 0.20.0
safetensors 0.7.0
SecretStorage 3.3.1
Send2Trash 1.8.3
sentencepiece 0.2.1
sentry-sdk 2.54.0
setproctitle 1.3.7
setuptools 75.1.0
shellingham 1.5.4
six 1.16.0
smmap 5.0.3
sniffio 1.3.1
soupsieve 2.6
sse-starlette 3.3.2
stack-data 0.6.3
starlette 0.52.1
supervisor 4.3.0
sympy 1.14.0
tabulate 0.10.0
terminado 0.18.1
tiktoken 0.12.0
tinycss2 1.3.0
tokenizers 0.22.2
torch 2.10.0
torch_c_dlpack_ext 0.1.5
torchaudio 2.10.0
torchvision 0.25.0
tornado 6.4.1
tqdm 4.67.3
traitlets 5.14.3
transformers 5.3.0
triton 3.6.0
trl 1.0.0.dev0
typer 0.24.1
types-python-dateutil 2.9.0.20240906
typing_extensions 4.15.0
typing-inspection 0.4.2
uri-template 1.3.0
urllib3 2.2.3
uvicorn 0.41.0
uvloop 0.22.1
vllm 0.17.0
wadllib 1.3.6
wandb 0.25.0
watchfiles 1.1.1
wcwidth 0.2.13
webcolors 24.8.0
webencodings 0.5.1
websocket-client 1.8.0
websockets 16.0
wheel 0.44.0
widgetsnbextension 4.0.13
xgrammar 0.1.29
xxhash 3.6.0
yarl 1.23.0
zipp 3.23.0
Checklist
- I have checked that my issue isn't already filed (see open issues)
- I have included my system information
- Any code provided is minimal, complete, and reproducible (more on MREs)
- Any code provided is properly formatted in code blocks, (no screenshot, more on code blocks)
- Any traceback provided is complete