Skip to content

Commit 0f433c0

Browse files
committed
clean code
1 parent ea8f30b commit 0f433c0

File tree

17 files changed

+29
-44
lines changed

17 files changed

+29
-44
lines changed

lightllm/common/basemodel/layer_weights/hf_load_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def load_hf_weights(data_type, weight_dir, pre_post_layer=None, transformer_laye
6060
transformer_layer_list=transformer_layer_list,
6161
weight_dir=weight_dir,
6262
) # noqa
63-
worker = int(os.environ.get("LOADWORKER", 16))
63+
worker = int(os.environ.get("LOADWORKER", 1))
6464
with Pool(worker) as p:
6565
iterator = p.imap_unordered(partial_func, candidate_files, chunksize=1)
6666
desc_str = f"pid {os.getpid()} Loading model weights with {worker} workers"

lightllm/common/req_manager.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from lightllm.common.basemodel.triton_kernel.gen_sampling_params import update_req_to_token_id_counter
88
from lightllm.utils.envs_utils import enable_env_vars, get_env_start_args
99
from lightllm.utils.config_utils import get_vocab_size
10-
from lightllm.server.router.dynamic_prompt.hybrid_radix_cache import HybridMemManager
1110

1211
logger = init_logger(__name__)
1312

lightllm/common/triton_utils/autotune_kernel_configs/triton_3.4.0/NVIDIA_H200/chunk_local_cumsum_scalar/{B=1,BT=64,H=8,IS_VARLEN=true,REVERSE=false}_NVIDIA_H200.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
"16": {
1515
"num_warps": 4
1616
},
17+
"164096": {
18+
"num_warps": 1
19+
},
1720
"2048": {
1821
"num_warps": 2
1922
},

lightllm/common/triton_utils/autotune_kernel_configs/triton_3.4.0/NVIDIA_H200/fused_gdn_gating:v1/{NUM_HEADS=8,a_dtype=torch.bfloat16}_NVIDIA_H200.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
"BLK_HEADS": 8,
2020
"num_warps": 2
2121
},
22+
"164096": {
23+
"BLK_HEADS": 8,
24+
"num_warps": 1
25+
},
2226
"2048": {
2327
"BLK_HEADS": 16,
2428
"num_warps": 1

lightllm/common/triton_utils/autotune_kernel_configs/triton_3.4.0/NVIDIA_H200/gated_rmsnorm_forward:v1/{N=128,has_bias=false,weight_dtype=torch.bfloat16,x_dtype=torch.bfloat16}_NVIDIA_H200.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
"BLOCK_N": 256,
88
"num_warps": 1
99
},
10+
"1312768": {
11+
"BLOCK_N": 64,
12+
"num_warps": 2
13+
},
1014
"16384": {
1115
"BLOCK_N": 128,
1216
"num_warps": 1

lightllm/common/triton_utils/autotune_kernel_configs/triton_3.4.0/NVIDIA_H200/moe_align_fused:v1/{topk_num=10}_NVIDIA_H200.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
"BLOCK_SIZE": 128,
3232
"num_warps": 4
3333
},
34+
"32768": {
35+
"BLOCK_SIZE": 256,
36+
"num_warps": 8
37+
},
3438
"4096": {
3539
"BLOCK_SIZE": 128,
3640
"num_warps": 8

lightllm/common/triton_utils/autotune_kernel_configs/triton_3.4.0/NVIDIA_H200/silu_and_mul_fwd:v1/{N=128,out_dtype=torch.bfloat16}_NVIDIA_H200.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@
5959
"NUM_STAGES": 2,
6060
"num_warps": 4
6161
},
62+
"164096": {
63+
"BLOCK_M": 64,
64+
"BLOCK_N": 128,
65+
"NUM_STAGES": 4,
66+
"num_warps": 1
67+
},
6268
"2048": {
6369
"BLOCK_M": 1,
6470
"BLOCK_N": 256,

lightllm/common/triton_utils/autotuner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def autotune(
6262
as needed before invocation.
6363
"""
6464

65-
def decorator(fn):
65+
def decorator(fn: Callable) -> Callable:
6666
return Autotuner(
6767
fn=fn,
6868
kernel_name=kernel_name,

lightllm/models/qwen3next/layer_infer/transformer_layer_infer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,14 +271,14 @@ def _linear_attn(
271271
query_start_loc=infer_state.b1_cu_q_seq_len,
272272
cache_indices=buffer_idx,
273273
has_initial_state=infer_state.b_ready_cache_len > 0,
274-
conv_states=conv_states.transpose(1, 2),
274+
conv_states=conv_states,
275275
activation=self.activation,
276276
)
277277
mixed_qkv = out_tensor.transpose(0, 1)
278278
else:
279279
mixed_qkv = causal_conv1d_update(
280280
mixed_qkv,
281-
conv_states.transpose(1, 2),
281+
conv_states,
282282
layer_weight.linear_conv1d.mm_param.weight.transpose(0, 1),
283283
bias=layer_weight.linear_conv1d.mm_param.bias,
284284
activation=self.activation,

lightllm/models/qwen3next/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def _init_mem_manager(self):
9292
mtp_layer_num=start_args.mtp_step,
9393
full_attention_interval=self.config["full_attention_interval"],
9494
conv_state_dtype=self.data_type,
95-
conv_state_shape=(conv_kernel_size - 1 + mtp_step, conv_dim // self.tp_world_size_),
95+
conv_state_shape=(conv_dim // self.tp_world_size_, conv_kernel_size - 1 + mtp_step),
9696
ssm_state_dtype=ssm_dtype_dict[start_args.mamba_ssm_data_type],
9797
ssm_state_shape=(
9898
# mtp_step + 1,

0 commit comments

Comments
 (0)