Skip to content

Commit ce91802

Browse files
authored
[Iluvatar GPU] Modify the names of some variables (#3273)
1 parent b4fef2c commit ce91802

File tree

3 files changed

+16
-13
lines changed

3 files changed

+16
-13
lines changed

docs/get_started/installation/iluvatar_gpu.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ docker exec -it paddle_infer bash
3232
```bash
3333
pip3 install paddlepaddle==3.1.0a0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
3434
pip3 install paddle-iluvatar-gpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/
35-
pip3 install fastdeploy_iluvatar_gpu -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simplels
35+
pip3 install fastdeploy_iluvatar_gpu==2.1.0.dev0 -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simplels
3636
```
3737

3838
## Prepare the inference demo script

fastdeploy/model_executor/ops/iluvatar/moe_ops.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,25 +79,27 @@ def group_gemm(
7979
def iluvatar_moe_expert_ffn(
8080
permute_input: paddle.Tensor,
8181
tokens_expert_prefix_sum: paddle.Tensor,
82-
ffn1_weight: paddle.Tensor,
83-
ffn2_weight: paddle.Tensor,
84-
ffn1_bias: Optional[paddle.Tensor],
85-
ffn1_scale: Optional[paddle.Tensor],
86-
ffn2_scale: Optional[paddle.Tensor],
87-
ffn2_in_scale: Optional[paddle.Tensor],
82+
up_gate_proj_weight: paddle.Tensor,
83+
down_proj_weight: paddle.Tensor,
84+
up_gate_proj_bias: Optional[paddle.Tensor],
85+
up_gate_proj_scale: Optional[paddle.Tensor],
86+
down_proj_scale: Optional[paddle.Tensor],
87+
down_proj_in_scale: Optional[paddle.Tensor],
8888
expert_idx_per_token: Optional[paddle.Tensor],
8989
quant_method: str,
9090
used_in_ep_low_latency: bool,
9191
):
92-
assert ffn1_bias is None
93-
assert ffn1_scale is not None
94-
assert ffn2_scale is not None
95-
assert ffn2_in_scale is None
92+
assert up_gate_proj_bias is None
93+
assert up_gate_proj_scale is not None
94+
assert down_proj_scale is not None
95+
assert down_proj_in_scale is None
9696
assert expert_idx_per_token is None
9797
assert quant_method in ("weight_only_int8")
9898
assert not used_in_ep_low_latency
9999
tokens_expert_prefix_sum_cpu = tokens_expert_prefix_sum.to("cpu")
100-
ffn1_output = w8a16_group_gemm(permute_input, ffn1_weight, ffn1_scale, tokens_expert_prefix_sum_cpu, -1)
100+
ffn1_output = w8a16_group_gemm(
101+
permute_input, up_gate_proj_weight, up_gate_proj_scale, tokens_expert_prefix_sum_cpu, -1
102+
)
101103
act_out = swiglu(ffn1_output)
102-
output = w8a16_group_gemm(act_out, ffn2_weight, ffn2_scale, tokens_expert_prefix_sum_cpu, -1)
104+
output = w8a16_group_gemm(act_out, down_proj_weight, down_proj_scale, tokens_expert_prefix_sum_cpu, -1)
103105
return output

fastdeploy/worker/iluvatar_worker.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def determine_available_memory(self) -> int:
8888
return int(float(os.getenv("FD_ILUVATAR_KVCACHE_MEM", "3")) * 1024**3)
8989

9090

91+
# TODO (yuzhe.wu): move it int work_process.py after baidu reconstructs the logic of workproc
9192
class IluvatarPaddleDisWorkerProc(PaddleDisWorkerProc):
9293
"""
9394
Paddle Distributed wrapper for fastdeploy.worker.Worker,

0 commit comments

Comments
 (0)