diff --git a/README.md b/README.md
index 822338936..ad5dc9217 100644
--- a/README.md
+++ b/README.md
@@ -84,10 +84,9 @@ Leveraging a multi-role distributed architecture with Ray for flexible resource
 [RAFT++](https://alibaba.github.io/ROLL/docs/English/UserGuide/algorithms/RAFT_Plus_Plus)  
 [StarPO](https://alibaba.github.io/ROLL/docs/English/UserGuide/algorithms/agentic_StarPO)
 
-#### Beckend
+#### Backend
 [DeepSeed](https://alibaba.github.io/ROLL/docs/English/UserGuide/backend/deepspeed)  
-[Megatron](https://alibaba.github.io/ROLL/docs/English/UserGuide/backend/megatron)  
-[LoRA](https://alibaba.github.io/ROLL/docs/English/UserGuide/backend/lora)  
+[Megatron](https://alibaba.github.io/ROLL/docs/English/UserGuide/backend/megatron)   
 [vLLM](https://alibaba.github.io/ROLL/docs/English/UserGuide/backend/vllm)  
 [SGLang](https://alibaba.github.io/ROLL/docs/English/UserGuide/backend/sglang)
 
@@ -119,7 +118,7 @@ Leveraging a multi-role distributed architecture with Ray for flexible resource
     *   Inference/Generation supports vLLM, SGLang.
     *   Training supports DeepSpeed (ZeRO), Megatron-LM 5D parallelism (mcore-adapter, dp/tp/pp/cp/ep), FSDP under implementation.
     *   Extreme offload/reload capabilities.
-    *   Supports LoRA training.
+    *   Supports [LoRA](https://alibaba.github.io/ROLL/docs/English/UserGuide/backend/lora) training.
     *   Supports FP8 rollout (FP8 inference for LLM as judge, FP8 rollout with BF16 training under development).
 *   **AutoDeviceMapping:** Supports custom device mapping for different roles, flexibly managing colocated and disaggregated deployments.
 *   **Observability:** Integrated with SwanLab / WandB / TensorBoard, tracking of performance for each domain and reward type.
diff --git a/docs_roll/docs/English/UserGuide/algorithms/GRPO.md b/docs_roll/docs/English/UserGuide/algorithms/GRPO.md
index f5bac5484..75a54bfa5 100644
--- a/docs_roll/docs/English/UserGuide/algorithms/GRPO.md
+++ b/docs_roll/docs/English/UserGuide/algorithms/GRPO.md
@@ -26,7 +26,7 @@ adv_estimator: "grpo"
 ppo_epochs: 1
 use_kl_loss: true
 kl_loss_coef: 0.001
-loss_agg_mode: "seq-mean-token-sum"
+loss_agg_mode: "seq-mean-token-mean"
 
 # ppo related
 # advantage
diff --git "a/docs_roll/docs/\347\256\200\344\275\223\344\270\255\346\226\207/\344\275\277\347\224\250\346\214\207\345\215\227/algorithms/GRPO.md" "b/docs_roll/docs/\347\256\200\344\275\223\344\270\255\346\226\207/\344\275\277\347\224\250\346\214\207\345\215\227/algorithms/GRPO.md"
index c008ed031..445d56e5b 100644
--- "a/docs_roll/docs/\347\256\200\344\275\223\344\270\255\346\226\207/\344\275\277\347\224\250\346\214\207\345\215\227/algorithms/GRPO.md"
+++ "b/docs_roll/docs/\347\256\200\344\275\223\344\270\255\346\226\207/\344\275\277\347\224\250\346\214\207\345\215\227/algorithms/GRPO.md"
@@ -26,7 +26,7 @@ adv_estimator: "grpo"
 ppo_epochs: 1
 use_kl_loss: true
 kl_loss_coef: 0.001
-loss_agg_mode: "seq-mean-token-sum"
+loss_agg_mode: "seq-mean-token-mean"
 
 # ppo related
 # advantage