diff --git a/.gitignore b/.gitignore
index 1ee937f0cffa..5f29895be914 100644
--- a/.gitignore
+++ b/.gitignore
@@ -140,4 +140,4 @@ autogen/
 #fp8
 ops/csrc/fp8/deep_gemm/include/cutlass
 ops/csrc/fp8/deep_gemm/include/cute
-.ccls-cache
+.ccls-cache
\ No newline at end of file
diff --git a/slm/pipelines/examples/contrastive_training/README.md b/slm/pipelines/examples/contrastive_training/README.md
index d3148786ec06..c33ed2941b6e 100644
--- a/slm/pipelines/examples/contrastive_training/README.md
+++ b/slm/pipelines/examples/contrastive_training/README.md
@@ -1,17 +1,23 @@
 # 向量检索模型训练
 
-推荐安装 gpu 版本的[PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/conda/linux-conda.html)，以 cuda12.3的 paddle 为例，安装命令如下：
+推荐安装 gpu 版本的[PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/conda/linux-conda.html)，以 cuda11.8的 paddle 为例，安装命令如下：
 
 ```
-conda install nccl -c conda-forge
-conda install paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/ -c conda-forge
-```
-安装其他依赖：
-```
-pip install git+https://github.com/PaddlePaddle/PaddleNLP.git@develop
-pip install -r requirements.txt
+# 创建一个名为 paddle_env 的新环境，并激活
+conda create --name paddle_env python=3.10
+conda activate paddle_env
+
+# 安装 paddlenlp develop版本 
+pip install --pre --upgrade paddlenlp -f https://www.paddlepaddle.org.cn/whl/paddlenlp.html
+
+# 安装 paddlepaddle-gpu nightly版本
+pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/
+
+#安装其他依赖：
+pip install -r slm/pipelines/examples/contrastive_training/requirements.txt
 ```
 
+
 下载 DuReader-Retrieval 中文数据集：
 ```
 cd data
@@ -45,7 +51,7 @@ python train.py --do_train \
 python -m paddle.distributed.launch --gpus "0,1,2,3" train.py --do_train \
               --model_name_or_path rocketqa-zh-base-query-encoder \
               --output_dir rocketqa-zh-base-query-encoder-duretrieval \
-              --train_data ./data/dual.train.json \
+              --train_data ./data/dureader_dual.train.jsonl \
               --overwrite_output_dir \
               --fine_tune_type sft \
               --sentence_pooling_method cls \
@@ -132,7 +138,7 @@ python evaluation/benchmarks.py --model_type bert \
     --passage_max_length 512 \
 ```
 可配置参数包括：
-- `model_type`: 模型的类似，可选 bert 或 roberta 等等
+- `model_type`: 模型的类型，可选 bert 或 roberta 等等
 - `query_model`: query 向量模型的路径
 - `passage_model`: passage 向量模型的路径
 - `query_max_length`: query 的最大长度
@@ -179,7 +185,7 @@ python -u evaluation/eval_mteb.py \
 - `add_bos_token`：是否添加起始符，0表示不添加，1表示添加
 - `add_eos_token`：是否添加结束符，0表示不添加，1表示添加
 
-# MTEB 评估
+## MTEB 评估
 [MTEB](https://github.com/embeddings-benchmark/mteb)
 是一个大规模文本嵌入评测基准，包含了丰富的向量检索评估任务和数据集。
 本仓库主要面向其中的英文检索任务（Retrieval），并额外支持针对 MSMARCO-Title 的评估。
@@ -261,6 +267,39 @@ MTEB-Retrieval 数据集, NDCG@10分数：
 | LLARA-passage               |  52.48   |  47.51  |    26.13     |        37.26         |  44.12  | 81.09  |  43.98   |  69.17   |  45.49  |  37.07   | 61.76  |     82.29      |  17.30  |  76.07  |   36.73    |   81.30   |
 
 
+## 压缩
+
+### 模型删层
+模型剪枝脚本 `shortgpt_prune.py`，用于评估并移除大语言模型中重要性较低的层，以生成一个更小、更高效的模型。该脚本采用“块影响”度量来计算层的重要性，并直接在内存中完成剪枝和保存，流程高效。
+
+#### 使用方法
+
+通过以下命令执行剪枝脚本。可指定原始模型、输出路径、要剪枝的层数以及模型中transformer层的路径。
+
+以repllama-v1-7b-lora-passage为例：
+```bash
+python shortgpt_prune.py \
+    --model_name_or_path castorini/repllama-v1-7b-lora-passage \
+    --output_model_path ./pruned-repllama-v1-7b-lora-passage \
+    --n_prune_layers 6 \
+    --layers_path "llama.layers"
+```
+
+以NV-Embed-v1为例：
+```bash
+python shortgpt_prune.py \
+    --model_name_or_path nvidia/NV-Embed-v1 \
+    --output_model_path /pruned-NV-Embed-v1_pruned_26 \
+    --n_prune_layers 6 \
+    --layers_path "layers"
+```
+可配置参数包括：
+- `--model_name_or_path`: 原始模型的名称或本地路径。
+- `--output_model_path`: 剪枝后模型的保存路径。
+- `--n_prune_layers`: 希望移除的层数。脚本会自动找出最不重要的N层。
+- `--layers_path`: 模型对象中指向transformer层列表的点分隔路径（例如repllama为`"llama.layers"`, llama为`"model.layers"`）。
+
+可用output_model_path路径中的模型跑评估[评估部分的代码](#评估)
 
 ## Reference
 
@@ -281,3 +320,5 @@ MTEB-Retrieval 数据集, NDCG@10分数：
 [8] Yingqi Qu, Yuchen Ding, Jing Liu, Kai Liu, Ruiyang Ren, Wayne Xin Zhao, Daxiang Dong, Hua Wu, Haifeng Wang: RocketQA: An Optimized Training Approach to Dense Passage Retrieval for Open-Domain Question Answering. NAACL 2021
 
 [9] Ruiyang Ren, Yingqi Qu, Jing Liu, Wayne Xin Zhao, Qiaoqiao She, Hua Wu, Haifeng Wang, Ji-Rong Wen: RocketQAv2: A Joint Training Method for Dense Passage Retrieval and Passage Re-ranking. EMNLP 2021
+
+[10] Xin Men, Mingyu Xu, Qingyu Zhang, Bingning Wang, Hongyu Lin, Yaojie Lu, Xianpei Han, Weipeng Chen: Shortgpt: Layers in large language models are more redundant than you expect. ACL Findings 2025
\ No newline at end of file
diff --git a/slm/pipelines/examples/contrastive_training/evaluation/benchmarks.py b/slm/pipelines/examples/contrastive_training/evaluation/benchmarks.py
index 62f00a11570f..327604e3f87f 100644
--- a/slm/pipelines/examples/contrastive_training/evaluation/benchmarks.py
+++ b/slm/pipelines/examples/contrastive_training/evaluation/benchmarks.py
@@ -21,7 +21,7 @@
 
 from datasets import load_dataset
 from mteb.abstasks import AbsTaskRetrieval
-from prediction import Eval_modle
+from prediction import Eval_model
 
 csv.field_size_limit(500 * 1024 * 1024)
 
@@ -51,7 +51,7 @@ def __init__(
         pooling_mode="mean_tokens",
         **kwargs,
     ):
-        self.query_model = Eval_modle(
+        self.query_model = Eval_model(
             model=query_model,
             max_seq_len=max_seq_len,
             batch_size=batch_size,
diff --git a/slm/pipelines/examples/contrastive_training/evaluation/eval_mteb.py b/slm/pipelines/examples/contrastive_training/evaluation/eval_mteb.py
index eb676d23f8f2..5813a1117880 100644
--- a/slm/pipelines/examples/contrastive_training/evaluation/eval_mteb.py
+++ b/slm/pipelines/examples/contrastive_training/evaluation/eval_mteb.py
@@ -29,7 +29,7 @@
 class MSMARCOTITLE(AbsTaskRetrieval):
     metadata = TaskMetadata(
         dataset={
-            "corpus_path": "Tevatron/msmarco-passage-corpus",
+            "corpus_path": "Tevatron/msmarco-passage-corpus-new",
             "path": "mteb/msmarco",
             "revision": "c5a29a104738b98a9e76336939199e264163d4a0",
         },
@@ -53,6 +53,9 @@ class MSMARCOTITLE(AbsTaskRetrieval):
         bibtex_citation=None,
         n_samples=None,
         avg_character_length=None,
+        modalities=["text"],
+        sample_creation="created",
+        descriptive_stats={},
     )
 
     def load_data(self, **kwargs):
diff --git a/slm/pipelines/examples/contrastive_training/evaluation/eval_mteb.sh b/slm/pipelines/examples/contrastive_training/evaluation/eval_mteb.sh
index 262496a3f122..52e146381bf7 100644
--- a/slm/pipelines/examples/contrastive_training/evaluation/eval_mteb.sh
+++ b/slm/pipelines/examples/contrastive_training/evaluation/eval_mteb.sh
@@ -12,18 +12,43 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+#!/bin/bash
 
-for task in "ArguAna" "ClimateFEVER" "DBPedia" "FEVER" "FiQA2018" "HotpotQA" "MSMARCO" "NFCorpus" "NQ" "QuoraRetrieval" "SCIDOCS" "SciFact" "Touche2020" "TRECCOVID" "CQADupstackAndroidRetrieval" "CQADupstackEnglishRetrieval" "CQADupstackGamingRetrieval" "CQADupstackGisRetrieval" "CQADupstackMathematicaRetrieval" "CQADupstackPhysicsRetrieval" "CQADupstackProgrammersRetrieval" "CQADupstackStatsRetrieval" "CQADupstackTexRetrieval" "CQADupstackUnixRetrieval" "CQADupstackWebmastersRetrieval" "CQADupstackWordpressRetrieval" "MSMARCOTITLE"
-do
+# --- Script Configuration ---
+# Exit immediately if a command exits with a non-zero status.
+set -e
 
-       # 1. RocketQA V1
-       python3.10 -u eval_mteb.py \
-              --corpus_model_name_or_path rocketqa-en-base-v1/passage_model \
-              --query_model_name_or_path rocketqa-en-base-v1/query_model \
+# Define the list of all tasks (datasets) to be evaluated.
+# TASKS=(
+#     "ArguAna" "ClimateFEVER" "DBPedia" "FEVER" "FiQA2018" "HotpotQA" "MSMARCO" "NFCorpus" "NQ" "QuoraRetrieval" 
+#     "SCIDOCS" "SciFact" "Touche2020" "TRECCOVID" "CQADupstackAndroidRetrieval" "CQADupstackEnglishRetrieval" 
+#     "CQADupstackGamingRetrieval" "CQADupstackGisRetrieval" "CQADupstackMathematicaRetrieval" "CQADupstackPhysicsRetrieval" 
+#     "CQADupstackProgrammersRetrieval" "CQADupstackStatsRetrieval" "CQADupstackTexRetrieval" "CQADupstackUnixRetrieval" 
+#     "CQADupstackWebmastersRetrieval" "CQADupstackWordpressRetrieval" "MSMARCOTITLE"
+# )
+
+TASKS=("ArguAna" "SCIDOCS" "FEVER")
+
+
+# You can uncomment the models you wish to evaluate.
+# MODELS_TO_RUN=("RocketQA-V1" "RocketQA-V2" "BGE" "RepLLaMA" "NV-Embed-v1" "BGE-EN-ICL" "LLARA-passage")
+MODELS_TO_RUN=("BGE") 
+
+
+# ===================================================================================
+# 🚀 1. RocketQA V1
+# ===================================================================================
+if [[ " ${MODELS_TO_RUN[*]} " =~ " RocketQA-V1 " ]]; then
+    echo "===== Running Evaluation for Model: RocketQA V1 ====="
+    for task in "${TASKS[@]}"; do
+        echo "--- Task: $task ---"
+        python3.10 -u evaluation/eval_mteb.py \
+              --corpus_model_name_or_path rocketqa-v1-marco-para-encoder \
+              --query_model_name_or_path rocketqa-v1-marco-query-encoder \
               --model_flag RocketQA-V1 \
               --output_folder en_results/rocketqa-en-base-v1 \
               --task_name "$task" \
-              --task_split $(if [[ "$task" == *"MSMARCO"* ]]; then echo "dev"; else echo "test"; fi) \
+              --task_split $([[ "$task" == *"MSMARCO"* ]] && echo "dev" || echo "test") \
               --query_instruction "" \
               --document_instruction "" \
               --max_seq_length 512 \
@@ -31,15 +56,24 @@ do
               --dtype "float32" \
               --padding_side right \
               --pooling_method "cls"
+    done
+fi
+
 
-       # 2. RocketQA V2     
-       python3.10 -u eval_mteb.py \
-              --corpus_model_name_or_path rocketqa-en-base-v2/passage_model \
-              --query_model_name_or_path rocketqa-en-base-v2/query_model \
+# ===================================================================================
+# 🚀 2. RocketQA V2
+# ===================================================================================
+if [[ " ${MODELS_TO_RUN[*]} " =~ " RocketQA-V2 " ]]; then
+    echo "===== Running Evaluation for Model: RocketQA V2 ====="
+    for task in "${TASKS[@]}"; do
+        echo "--- Task: $task ---"
+        python3.10 -u evaluation/eval_mteb.py \
+              --corpus_model_name_or_path rocketqav2-en-marco-para-encoder \
+              --query_model_name_or_path rocketqav2-en-marco-query-encoder \
               --model_flag RocketQA-V2 \
               --output_folder en_results/rocketqa-en-base-v2 \
               --task_name "$task" \
-              --task_split $(if [[ "$task" == *"MSMARCO"* ]]; then echo "dev"; else echo "test"; fi) \
+              --task_split $([[ "$task" == *"MSMARCO"* ]] && echo "dev" || echo "test") \
               --query_instruction "" \
               --document_instruction "" \
               --max_seq_length 512 \
@@ -47,27 +81,45 @@ do
               --dtype "float32" \
               --padding_side right \
               --pooling_method "cls"
+    done
+fi
+
 
-       # 3. BGE
-       python3.10 eval_mteb.py \
+# ===================================================================================
+# 🎯 3. BGE (BAAI/bge-large-en-v1.5)
+# ===================================================================================
+if [[ " ${MODELS_TO_RUN[*]} " =~ " BGE " ]]; then
+    echo "===== Running Evaluation for Model: BGE (bge-large-en-v1.5) ====="
+    for task in "${TASKS[@]}"; do
+        echo "--- Task: $task ---"
+        python3.10 evaluation/eval_mteb.py \
               --base_model_name_or_path BAAI/bge-large-en-v1.5 \
-              --output_folder en_results/bge-large-en-v1.5 \
+              --output_folder en_results/bge-large-en-v1.5_2 \
               --task_name "$task" \
-              --task_split $(if [[ "$task" == *"MSMARCO"* ]]; then echo "dev"; else echo "test"; fi) \
+              --task_split $([[ "$task" == *"MSMARCO"* ]] && echo "dev" || echo "test") \
               --document_instruction 'Represent this sentence for searching relevant passages: ' \
               --pooling_method mean \
               --max_seq_length 512 \
               --eval_batch_size 32 \
               --padding_side right \
               --add_bos_token 0 \
-              --add_eos_token 0
+              --add_eos_token 0 
+    done
+fi
 
-       # 4. RepLLaMA
-       python3.10 eval_mteb.py \
+
+# ===================================================================================
+# 🦙 4. RepLLaMA
+# ===================================================================================
+if [[ " ${MODELS_TO_RUN[*]} " =~ " RepLLaMA " ]]; then
+    echo "===== Running Evaluation for Model: RepLLaMA ====="
+    for task in "${TASKS[@]}"; do
+        echo "--- Task: $task ---"
+        python3.10 evaluation/eval_mteb.py \
               --base_model_name_or_path castorini/repllama-v1-7b-lora-passage \
               --output_folder en_results/repllama-v1-7b-lora-passage \
               --task_name "$task" \
-              --task_split $(if [[ "$task" == *"MSMARCO"* ]]; then echo "dev"; else echo "test"; fi) \
+              --task_split $([[ "$task" == *"MSMARCO"* ]] && echo "dev" || echo "test") \
               --query_instruction 'query: ' \
               --document_instruction 'passage: ' \
               --pooling_method last \
@@ -76,22 +128,40 @@ do
               --padding_side right \
               --add_bos_token 0 \
               --add_eos_token 1
+    done
+fi
+
 
-       # 5. NV-Embed-v1
-       python3.10 eval_mteb.py \
+# ===================================================================================
+#  Nvidia 5. NV-Embed-v1
+# ===================================================================================
+if [[ " ${MODELS_TO_RUN[*]} " =~ " NV-Embed-v1 " ]]; then
+    echo "===== Running Evaluation for Model: NV-Embed-v1 ====="
+    for task in "${TASKS[@]}"; do
+        echo "--- Task: $task ---"
+        python3.10 evaluation/eval_mteb.py \
               --base_model_name_or_path nvidia/NV-Embed-v1 \
               --output_folder en_results/nv-embed-v1 \
               --query_instruction "Given a claim, find documents that refute the claim" \
               --task_name "$task" \
-              --task_split $(if [[ "$task" == *"MSMARCO"* ]]; then echo "dev"; else echo "test"; fi) \
+              --task_split $([[ "$task" == *"MSMARCO"* ]] && echo "dev" || echo "test") \
               --eval_batch_size 8
+    done
+fi
+
 
-       # 6. BGE-EN-ICL
-       python3.10 eval_mteb.py \
+# ===================================================================================
+# 🎯 6. BGE-EN-ICL
+# ===================================================================================
+if [[ " ${MODELS_TO_RUN[*]} " =~ " BGE-EN-ICL " ]]; then
+    echo "===== Running Evaluation for Model: BGE-EN-ICL ====="
+    for task in "${TASKS[@]}"; do
+        echo "--- Task: $task ---"
+        python3.10 evaluation/eval_mteb.py \
               --base_model_name_or_path BAAI/bge-en-icl \
               --output_folder en_results/bge-en-icl \
               --task_name "$task" \
-              --task_split $(if [[ "$task" == *"MSMARCO"* ]]; then echo "dev"; else echo "test"; fi) \
+              --task_split $([[ "$task" == *"MSMARCO"* ]] && echo "dev" || echo "test") \
               --query_instruction $'<instruct> Given a scientific claim, retrieve documents that support or refute the claim.\n<query>' \
               --max_seq_length 512 \
               --eval_batch_size 32 \
@@ -99,18 +169,31 @@ do
               --padding_side left \
               --add_bos_token 1 \
               --add_eos_token 1
+    done
+fi
 
-       # 7. LLARA-passage
-       python3.10 eval_mteb.py \
+
+# ===================================================================================
+# 🦙 7. LLARA-passage
+# ===================================================================================
+if [[ " ${MODELS_TO_RUN[*]} " =~ " LLARA-passage " ]]; then
+    echo "===== Running Evaluation for Model: LLARA-passage ====="
+    for task in "${TASKS[@]}"; do
+        echo "--- Task: $task ---"
+        python3.10 evaluation/eval_mteb.py \
               --base_model_name_or_path BAAI/LLARA-passage \
               --output_folder en_results/llara-passage \
               --task_name "$task" \
-              --task_split $(if [[ "$task" == *"MSMARCO"* ]]; then echo "dev"; else echo "test"; fi) \
+              --task_split $([[ "$task" == *"MSMARCO"* ]] && echo "dev" || echo "test") \
               --eval_batch_size 8 \
               --pooling_method last_8 \
               --model_flag llara \
               --add_bos_token 1 \
               --add_eos_token 0 \
               --max_seq_length 532
+    done
+fi
+
+
 
-done
\ No newline at end of file
+echo "All specified evaluations are complete."
\ No newline at end of file
diff --git a/slm/pipelines/examples/contrastive_training/evaluation/prediction.py b/slm/pipelines/examples/contrastive_training/evaluation/prediction.py
index 9cbbfb7edc3d..8311a2efafdd 100644
--- a/slm/pipelines/examples/contrastive_training/evaluation/prediction.py
+++ b/slm/pipelines/examples/contrastive_training/evaluation/prediction.py
@@ -11,20 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
-import sys
-
 import numpy as np
 import paddle
 
 from paddlenlp.data import DataCollatorWithPadding
-from paddlenlp.transformers import AutoTokenizer
-
-sys.path.append(os.path.abspath("."))
-from models.modeling import BiEncoderModel
+from paddlenlp.transformers import AutoTokenizer, BiEncoderModel
 
 
-class Eval_modle:
+class Eval_model:
     def __init__(
         self,
         model: str = None,
@@ -45,10 +39,8 @@ def _construct_model(self):
         Construct the inference model for the predictor.
         """
         if self.model_type in ["bert", "roberta", "ernie"]:
-            self._model = BiEncoderModel.from_pretrained(
-                model_name_or_path=self.model,
-                normalized=True,
-                sentence_pooling_method="cls",
+            self._model = BiEncoderModel(
+                model_name_or_path=self.model, normalized=True, sentence_pooling_method="cls", dtype="float32"
             )
             print(f"loading checkpoints {self.model}")
         else:
@@ -138,7 +130,7 @@ def _run_model(self, inputs, **kwargs):
         with paddle.no_grad():
             for batch_inputs in inputs["batches"]:
                 batch_inputs = self._collator(batch_inputs)
-                token_embeddings = self._model.encode(batch_inputs)
+                token_embeddings = self._model.encode(batch_inputs, self._model.model)
                 all_feats.append(token_embeddings.detach().cpu().numpy())
             return all_feats
 
diff --git a/slm/pipelines/examples/contrastive_training/requirements.txt b/slm/pipelines/examples/contrastive_training/requirements.txt
index 67d35be1ac5b..af6ff0e518f2 100644
--- a/slm/pipelines/examples/contrastive_training/requirements.txt
+++ b/slm/pipelines/examples/contrastive_training/requirements.txt
@@ -1,2 +1,4 @@
 mteb==1.12.80
-typer==0.9.0
\ No newline at end of file
+typer==0.9.0
+h11==0.16.0
+requests[socks]
\ No newline at end of file
diff --git a/slm/pipelines/examples/contrastive_training/shortgpt_prune.py b/slm/pipelines/examples/contrastive_training/shortgpt_prune.py
new file mode 100644
index 000000000000..3ad30180688d
--- /dev/null
+++ b/slm/pipelines/examples/contrastive_training/shortgpt_prune.py
@@ -0,0 +1,355 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import math
+import os
+import re
+from collections import OrderedDict
+from typing import List
+
+import paddle
+from datasets import load_dataset
+from paddle.io import DataLoader
+from tqdm import tqdm
+
+from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer, NVEncodeModel
+
+
+# =====================================================================================
+# 1. block_influence
+# =====================================================================================
+def block_influence(
+    input_hidden_state: paddle.Tensor,
+    output_hidden_state: paddle.Tensor,
+    angular: bool = False,
+) -> paddle.Tensor:
+    """
+    Calculates block influence between input and output hidden states.
+    """
+    _, _, d = input_hidden_state.shape
+    input_hidden_state = paddle.reshape(input_hidden_state, [-1, d])
+    output_hidden_state = paddle.reshape(output_hidden_state, [-1, d])
+
+    norm_input = paddle.norm(input_hidden_state, p=2, axis=-1, keepdim=True)
+    norm_output = paddle.norm(output_hidden_state, p=2, axis=-1, keepdim=True)
+
+    sim = paddle.matmul(input_hidden_state, output_hidden_state, transpose_y=True) / (norm_input * norm_output)
+    sim = paddle.diag(sim).astype("float32").nan_to_num(nan=0.5)
+
+    if angular:
+        return paddle.acos(sim) / math.pi
+    return 1 - sim
+
+
+# =====================================================================================
+# 2. ShortGPT
+# =====================================================================================
+class ShortGPT:
+    """
+    A class to evaluate layer importance in LLMs using PaddlePaddle.
+    """
+
+    def __init__(self, model_name: str, layers_path: str):
+        print(f"Loading tokenizer for '{model_name}'...")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        print(f"Loading model '{model_name}' with PaddlePaddle backend...")
+        if "NV-Embed" in model_name:
+            self.model = NVEncodeModel.from_pretrained(
+                model_name, tokenizer_path=model_name, query_instruction="", document_instruction=""
+            )
+        else:
+            self.model = AutoModelForCausalLM.from_pretrained(model_name, dtype=paddle.float16)
+
+        self.model.eval()
+        print("Model loaded successfully for importance evaluation.")
+
+        try:
+            path_parts = layers_path.split(".")  # e.g., 'llama.layers' -> ['llama', 'layers']
+
+            self.base_model_for_call = self.model
+            # 遍历路径中除了最后 'layers' 之外的部分 (e.g., 'llama')
+            for part in path_parts[:-1]:
+                self.base_model_for_call = getattr(self.base_model_for_call, part)
+
+            # 从基础模型中获取 'layers' 列表
+            self.layers = getattr(self.base_model_for_call, path_parts[-1])
+            print(f"Successfully located base model for evaluation call: {type(self.base_model_for_call)}")
+            print(f"Successfully located {len(self.layers)} layers.")
+
+        except AttributeError:
+            raise AttributeError(f"Could not find layers at path '{layers_path}' in the model architecture.")
+
+        self.importances = [0.0 for _ in self.layers]
+
+    def compute_bi(self, hiddens: List[paddle.Tensor]):
+        """
+        Computes and accumulates block influence scores from hidden states.
+        """
+        n = 1
+        for i in range(len(hiddens) - n):
+            layer_index = i
+            if layer_index < len(self.importances):
+                in_hidden = hiddens[i]
+                out_hidden = hiddens[i + n]
+                self.importances[layer_index] += block_influence(in_hidden, out_hidden).sum().item()
+
+    @paddle.no_grad()
+    def eval_importance(self, prompts: List[str], model_name: str, stride: int = 256):
+        """
+        Evaluates the importance of model layers on given prompts.
+        """
+        prompt_tokens = self.tokenizer(prompts, padding=True, return_attention_mask=True, return_tensors="pd")
+        input_ids = prompt_tokens.input_ids
+        attn_mask = prompt_tokens.attention_mask
+
+        max_prompt_len = max(len(t) for t in input_ids)
+
+        for start in range(0, max_prompt_len, stride):
+            seq_ids = (attn_mask.sum(axis=-1) > start).nonzero().squeeze()
+            seq_ids = seq_ids.unsqueeze(0) if seq_ids.ndim == 0 else seq_ids
+
+            if seq_ids.shape[0] == 0:
+                continue
+
+            inputs = input_ids[seq_ids, start : start + stride]
+            attn = attn_mask[seq_ids, start : start + stride]
+
+            if "NV-Embed" in model_name:
+                outputs = self.base_model_for_call.m_forward(
+                    input_ids=inputs, attention_mask=attn, output_hidden_states=True, return_dict=True
+                )
+            else:
+                outputs = self.base_model_for_call(
+                    input_ids=inputs, attention_mask=attn, output_hidden_states=True, return_dict=True
+                )
+
+            if outputs.hidden_states:
+                self.compute_bi(outputs.hidden_states)
+
+
+def load_model_weights(model_folder_path: str) -> OrderedDict:
+    print(f"Attempting to load model weights from FOLDER: '{model_folder_path}'...")
+
+    # 1. Ensure the path is a valid directory
+    if not os.path.isdir(model_folder_path):
+        raise NotADirectoryError(f"The provided path is not a valid directory: '{model_folder_path}'")
+
+    state_dict = OrderedDict()
+    index_path = os.path.join(model_folder_path, "model_state.pdparams.index.json")
+
+    # 2. Check for the presence of a sharded index file
+    if os.path.isfile(index_path):
+        # Case A: Sharded model format detected (index file found)
+        print("Sharded model format detected (index file found).")
+        with open(index_path, "r", encoding="utf-8") as f:
+            index_data = json.load(f)
+
+        shard_files = sorted(list(set(index_data["weight_map"].values())))
+        print(f"Found {len(shard_files)} shard(s).")
+
+        for shard_file in shard_files:
+            shard_path = os.path.join(model_folder_path, shard_file)
+            if not os.path.exists(shard_path):
+                raise FileNotFoundError(f"Shard file '{shard_file}' listed in index not found at '{shard_path}'")
+
+            print(f"  > Loading shard: {shard_file}")
+            shard_state_dict = paddle.load(shard_path, return_numpy=True)
+            state_dict.update(shard_state_dict)
+            del shard_state_dict
+        print("All weight shards loaded successfully.")
+
+    else:
+        # Case B: No index file found; look for a single .pdparams file
+        print("No index file found. Searching for a single .pdparams file inside the folder...")
+        pdparams_files = [f for f in os.listdir(model_folder_path) if f.endswith(".pdparams")]
+
+        if len(pdparams_files) == 1:
+            # Found exactly one .pdparams file
+            single_file_path = os.path.join(model_folder_path, pdparams_files[0])
+            print(f"  > Loading single parameters file: {pdparams_files[0]}")
+            state_dict = paddle.load(single_file_path, return_numpy=True)
+            print("Single weight file loaded successfully.")
+        elif len(pdparams_files) > 1:
+            raise ValueError(
+                f"Ambiguous model files. Multiple .pdparams files found in '{model_folder_path}' "
+                "but no 'model_state.pdparams.index.json' to specify order."
+            )
+        else:  # len(pdparams_files) == 0
+            raise FileNotFoundError(f"No .pdparams files found in the directory '{model_folder_path}'.")
+
+    return state_dict
+
+
+# =====================================================================================
+# 3. Prune and Save
+# =====================================================================================
+def prune_and_save_model_in_memory(
+    model,
+    tokenizer,
+    new_model_path,
+    layers_to_delete,
+    layers_path_str,
+):
+    """
+    Prunes and saves a model directly from the in-memory model object.
+    """
+    print("=" * 50)
+    print("PART 2: Starting In-Memory Model Pruning and Saving")
+    print("=" * 50)
+    os.makedirs(new_model_path, exist_ok=True)
+
+    # Step 1: Get state_dict directly from the in-memory model
+    print("Getting state_dict directly from the in-memory model...")
+    state_dict = model.state_dict()
+
+    # Step 2: Iterate, filter, and rename weights
+    print("Processing weights: removing specified layers and re-indexing...")
+    escaped_layers_path = layers_path_str.replace(".", r"\.")
+    layer_pattern = re.compile(rf"^{escaped_layers_path}\.(\d+)\.")
+    new_state_dict = OrderedDict()
+
+    for key, value in state_dict.items():
+        match = layer_pattern.match(key)
+        if not match:
+            new_state_dict[key] = value
+            continue
+
+        layer_idx = int(match.group(1))
+        if layer_idx in layers_to_delete:
+            continue
+
+        num_layers_deleted_before = sum(1 for deleted_idx in layers_to_delete if deleted_idx < layer_idx)
+        new_layer_idx = layer_idx - num_layers_deleted_before
+        old_prefix = f"{layers_path_str}.{layer_idx}."
+        new_prefix = f"{layers_path_str}.{new_layer_idx}."
+        new_key = key.replace(old_prefix, new_prefix, 1)
+        new_state_dict[new_key] = value
+
+    print(f"Processing complete. Removed {len(layers_to_delete)} layer(s): {sorted(list(layers_to_delete))}.")
+
+    # Step 3: Get and modify the configuration from the model object
+    print("Updating configuration file...")
+    config = model.config.to_dict()
+
+    # Fix: Convert non-serializable paddle data types to strings
+    for key, value in config.items():
+        if type(value).__name__ == "DataType":
+            config[key] = str(value).split(".")[-1]
+
+    if "num_hidden_layers" in config:
+        original_num_layers = config["num_hidden_layers"]
+        new_num_layers = original_num_layers - len(layers_to_delete)
+        config["num_hidden_layers"] = new_num_layers
+        print(f"  - Number of layers changed from {original_num_layers} to {new_num_layers}.")
+
+    new_config_path = os.path.join(new_model_path, "config.json")
+    with open(new_config_path, "w", encoding="utf-8") as f:
+        json.dump(config, f, indent=2)
+    print(f"New config saved to '{new_config_path}'.")
+
+    # Step 4: Save the new weights and tokenizer
+    print("Saving pruned weights...")
+    new_weights_path = os.path.join(new_model_path, "model_state.pdparams")
+    paddle.save(new_state_dict, new_weights_path)
+    print(f"Pruned weights saved to '{new_weights_path}'.")
+
+    print("Saving tokenizer files...")
+    tokenizer.save_pretrained(new_model_path)
+    print(f"Tokenizer files saved to '{new_model_path}'.")
+
+    print("\n🎉 Pruning process completed successfully!")
+    print(f"Pruned model has been saved to '{new_model_path}'")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Calculate layer importance, prune, and save a new PaddlePaddle model."
+    )
+    parser.add_argument(
+        "--model_name_or_path",
+        type=str,
+        required=True,
+        help="Path or HuggingFace name of the source PaddlePaddle model.",
+    )
+    parser.add_argument(
+        "--output_model_path", type=str, required=True, help="Path to save the new, pruned model directory."
+    )
+    parser.add_argument(
+        "--layers_path", type=str, required=True, help="Dot-separated path to the layers list (e.g., 'llama.layers')."
+    )
+    parser.add_argument(
+        "--n_prune_layers", type=int, required=True, help="The number of layers to identify and prune."
+    )
+    parser.add_argument(
+        "--dataset_name",
+        type=str,
+        default="emozilla/pg19",
+        help="Name of the Hugging Face dataset for calibration. Default: 'emozilla/pg19'.",
+    )
+    parser.add_argument(
+        "--dataset_split",
+        type=str,
+        default="validation",
+        help="The split of the dataset to use. Default: 'validation'.",
+    )
+    args = parser.parse_args()
+
+    # --- PART 1: Calculate Layer Importance ---
+    print("=" * 50)
+    print("PART 1: Calculating Layer Importance")
+    print("=" * 50)
+    print(f"Loading '{args.dataset_split}' split from '{args.dataset_name}' dataset for calibration...")
+    try:
+        data = load_dataset(args.dataset_name, split=args.dataset_split)
+    except Exception as e:
+        print(f"Failed to load dataset. Error: {e}")
+        print(
+            "Please ensure the dataset name and split are correct and you have internet access for Hugging Face datasets."
+        )
+        return
+
+    dataloader = DataLoader(data, batch_size=1, shuffle=False)
+
+    short_model = ShortGPT(model_name=args.model_name_or_path, layers_path=args.layers_path)
+
+    for batch in tqdm(dataloader, desc="Evaluating Layer Importance"):
+        if "text" not in batch:
+            raise ValueError("Dataset must contain a 'text' column.")
+        prompts = batch["text"]
+        short_model.eval_importance(prompts=prompts, model_name=args.model_name_or_path, stride=256)
+
+    prune_order = sorted(range(len(short_model.importances)), key=lambda i: short_model.importances[i])
+    layers_to_delete = set(prune_order[: args.n_prune_layers])
+
+    print("\n--- Importance Calculation Complete ---")
+    print(f"Calculated importances: {[f'{v:.2f}' for v in short_model.importances]}")
+    print(f"Pruning order (least to most important): {prune_order}")
+    print(f"Will delete the {args.n_prune_layers} least important layers: {sorted(list(layers_to_delete))}")
+
+    # --- PART 2: Perform In-Memory Pruning and Saving ---
+    prune_and_save_model_in_memory(
+        model=short_model.model,
+        tokenizer=short_model.tokenizer,
+        new_model_path=args.output_model_path,
+        layers_to_delete=layers_to_delete,
+        layers_path_str=args.layers_path,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/slm/pipelines/examples/contrastive_training/train.py b/slm/pipelines/examples/contrastive_training/train.py
index c18263dff95d..40ada0c70a07 100644
--- a/slm/pipelines/examples/contrastive_training/train.py
+++ b/slm/pipelines/examples/contrastive_training/train.py
@@ -109,6 +109,7 @@ def main():
             use_inbatch_neg=training_args.use_inbatch_neg,
             matryoshka_dims=training_args.matryoshka_dims if training_args.use_matryoshka else None,
             matryoshka_loss_weights=training_args.matryoshka_loss_weights if training_args.use_matryoshka else None,
+            dtype=dtype,
         )
 
     if training_args.fix_position_embedding:
@@ -129,13 +130,13 @@ def main():
     if training_args.fine_tune_type == "lora":
         if any([x in model_args.model_name_or_path for x in ["llama", "baichuan", "NV-Embed"]]):
             target_modules = [
-                ".*q_proj.*",
-                ".*k_proj.*",
-                ".*v_proj.*",
-                ".*o_proj.*",
-                ".*down_proj.*",
-                ".*up_proj.*",
-                ".*gate_proj.*",
+                ".*q_proj$",
+                ".*k_proj$",
+                ".*v_proj$",
+                ".*o_proj$",
+                ".*down_proj$",
+                ".*up_proj$",
+                ".*gate_proj$",
             ]
         else:
             raise ValueError("need to specify the target modules for LoRA fine-tuning.")