@@ -42,12 +42,13 @@ def unroll_model_tp_dict(model_tp_dict):
42
42
model_tp_dict = {
43
43
"Qwen/Qwen2-VL-7B-Instruct" : [1 , 2 ],
44
44
"Qwen/Qwen2-VL-72B-Instruct" : [1 , 2 ],
45
- "Qwen/Qwen2.5-VL-72B-Instruct" : [1 , 2 ]
45
+ "Qwen/Qwen2.5-VL-72B-Instruct" : [1 , 2 ],
46
+ "zai-org/GLM-4.1V-9B-Thinking" : [1 , 2 ],
46
47
}
47
48
48
49
# https://github.com/pytorch/pytorch/blob/main/torch/testing/_comparison.py#L1317
49
50
dtype_atol_rtol_list = [
50
- [torch .bfloat16 , 1e-5 , 1.6e-2 ],
51
+ [torch .bfloat16 , 1e-2 , 1.6e-2 ],
51
52
]
52
53
53
54
num_tokens_list = [11 , 8192 ]
@@ -73,10 +74,12 @@ def test_mrope(model_name, tp_size, dtype, atol, rtol, num_tokens):
73
74
74
75
rope_theta = config .rope_theta
75
76
max_position = config .max_position_embeddings
77
+ partial_rotary_factor = getattr (config , "partial_rotary_factor" , 1.0 )
78
+ rotary_dim = int (head_dim * partial_rotary_factor )
76
79
77
80
mrope_helper_class = get_rope (
78
81
head_size = head_dim ,
79
- rotary_dim = head_dim ,
82
+ rotary_dim = rotary_dim ,
80
83
max_position = max_position ,
81
84
base = rope_theta ,
82
85
is_neox_style = is_neox_style ,
@@ -110,7 +113,10 @@ def test_mrope(model_name, tp_size, dtype, atol, rtol, num_tokens):
110
113
reason = "Skipping CUDA/ROCm only tests." )
111
114
@pytest .mark .parametrize (
112
115
"model_name, tp_size" ,
113
- unroll_model_tp_dict ({"Qwen/Qwen2-VL-7B-Instruct" : [1 , 2 ]}))
116
+ unroll_model_tp_dict ({
117
+ "Qwen/Qwen2-VL-7B-Instruct" : [1 , 2 ],
118
+ "zai-org/GLM-4.1V-9B-Thinking" : [1 , 2 ]
119
+ }))
114
120
@pytest .mark .parametrize ("dtype, atol, rtol" , dtype_atol_rtol_list )
115
121
@pytest .mark .parametrize ("num_tokens" , [4 ])
116
122
def test_mrope_torch_compile_tracing (model_name , tp_size , dtype , atol , rtol ,
@@ -126,10 +132,12 @@ def test_mrope_torch_compile_tracing(model_name, tp_size, dtype, atol, rtol,
126
132
is_neox_style = True
127
133
rope_theta = config .rope_theta
128
134
max_position = config .max_position_embeddings
135
+ partial_rotary_factor = getattr (config , "partial_rotary_factor" , 1.0 )
136
+ rotary_dim = int (head_dim * partial_rotary_factor )
129
137
130
138
mrope_helper_class = get_rope (
131
139
head_size = head_dim ,
132
- rotary_dim = head_dim ,
140
+ rotary_dim = rotary_dim ,
133
141
max_position = max_position ,
134
142
base = rope_theta ,
135
143
is_neox_style = is_neox_style ,
@@ -145,7 +153,7 @@ def test_mrope_torch_compile_tracing(model_name, tp_size, dtype, atol, rtol,
145
153
# Create a wrapper that makes the in-place function appear functional
146
154
def functional_forward_cuda (pos , q , k ):
147
155
"""Wrapper that converts in-place operation to functional style
148
-
156
+
149
157
CUDA Graph does not support in-place operations.
150
158
This wrapper creates working copies of the
151
159
input tensors and modifies them.
0 commit comments