diff --git a/.ci/benchmark.py b/.ci/benchmark.py
new file mode 100644
index 0000000..ccd0268
--- /dev/null
+++ b/.ci/benchmark.py
@@ -0,0 +1,15 @@
+import os
+import sys
+from src.benchmark.utils import read_metrics, to_markdown_table
+
+if __name__ == "__main__":
+    # Generate statistics report
+    statistics_path = sys.argv[1]
+    metrics = read_metrics(statistics_path, metric="accuracy")
+    html_table = to_markdown_table(metrics)
+
+    # Write to workflow job summary
+    summary_path = os.environ["GITHUB_STEP_SUMMARY"]
+    with open(summary_path, "a") as f:
+        f.write("## Torchbenchmark statistics report\n")
+        f.write(html_table)
diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml
new file mode 100644
index 0000000..0449354
--- /dev/null
+++ b/.github/workflows/_ascend_npu_benchmark.yml
@@ -0,0 +1,148 @@
+name: '_ascend_npu_benchmark'
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        required: true
+        type: string
+        description: 'The runner selected to run on'
+      image:
+        required: true
+        type: string
+        description: 'The docker image which will be loaded'
+      device:
+        required: true
+        type: string
+        description: 'The device selected to run on'
+      artifact_name:
+        required: true
+        type: string
+        description: 'The torch_npu distribution artifact name'
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+jobs:
+  test:
+    name: run benchmarks for torch_npu
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: ${{ inputs.image }}
+      volumes:
+        - /usr/local/dcmi:/usr/local/dcmi
+        - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
+        - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
+        - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
+        - /etc/ascend_install.info:/etc/ascend_install.info
+      options: >-
+        --network host
+        --device ${{ inputs.device }}
+        --device /dev/davinci_manager
+        --device /dev/devmm_svm
+        --device /dev/hisi_hdc
+      env:
+        HTTP_PROXY: http://127.0.0.1:10809
+        HTTPS_PROXY: http://127.0.0.1:10809
+        ALL_PROXY: socks5://127.0.0.1:10808
+        SOCKS_PROXY: socks5://127.0.0.1:10808
+    steps:
+      - name: Show NPU info
+        run: |
+          npu-smi info
+
+      - name: Config mirrors
+        run: |
+          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+      - name: Install system dependencies
+        run: |
+          apt update
+          apt install --no-install-recommends -y \
+              git gcc g++ make cmake ninja-build curl \
+              libgl1 libglib2.0-0 libsndfile1
+
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      # TODO(shink): Update once PR merged
+      # https://github.com/pytorch/benchmark/pull/2550
+      - name: Checkout benchmark
+        uses: actions/checkout@v4
+        with:
+          repository: shink/benchmark
+          ref: feat/test_bench/continue_on_error
+          path: benchmark
+
+      - name: Download ${{ inputs.artifact_name }}
+        uses: actions/download-artifact@v4
+        with:
+          name: ${{ inputs.artifact_name }}
+          path: ascend_npu
+
+      - name: Install torch_npu
+        working-directory: ascend_npu
+        run: |
+          pip install ${{ inputs.artifact_name }}
+
+      - name: Install torch_npu dependencies
+        working-directory: ascend_npu
+        run: |
+          curl -fsSL -O https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt
+          pip install -r requirements.txt
+
+      - name: Install benchmark dependencies
+        run: |
+          pip install -r benchmark/requirements.txt --constraint ascend_npu/requirements.txt "numpy==1.*"
+          python benchmark/install.py --userbenchmark test_bench --continue_on_fail
+
+      - name: Install project dependencies
+        run: |
+          pip install -r requirements.txt
+
+      - name: Show environment info
+        run: |
+          npu_is_available=$(python -c "import torch; print(torch.npu.is_available())")
+          npu_count=$(python -c "import torch; print(torch.npu.device_count())")
+          echo "NPU is available: ${npu_is_available}"
+          echo "NPU count: ${npu_count}"
+          pip list | grep -E 'torch|numpy'
+
+      - name: Run benchmarks
+        working-directory: benchmark
+        run: |
+          python run_benchmark.py test_bench --accuracy --device npu --test eval \
+              --output ascend_npu_benchmark.json
+
+      - name: Upload the output file
+        id: upload-output
+        uses: actions/upload-artifact@v4
+        with:
+          name: ascend_npu_benchmark.json
+          path: benchmark/ascend_npu_benchmark.json
+          if-no-files-found: error
+          retention-days: 1
+          overwrite: true
+
+      - name: Write to workflow job summary
+        id: report
+        run: |
+          set -x
+          realpath benchmark/ascend_npu_benchmark.json
+          ls benchmark
+          cat benchmark/ascend_npu_benchmark.json
+
+          output_path=$(realpath benchmark/ascend_npu_benchmark.json)
+          python .ci/benchmark.py ${output_path}
+
+      # TODO(shink)
+      - name: Update README.md
+        if: ${{ github.event_name == 'push' }}
+        run: |
+          echo "${{ github.event_name }}"
+          echo "${{ github.event_name == 'push' }}"
diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml
index dfcfc5e..249fbc2 100644
--- a/.github/workflows/_ascend_npu_build.yml
+++ b/.github/workflows/_ascend_npu_build.yml
@@ -29,6 +29,13 @@ jobs:
     runs-on: ${{ inputs.runner }}
     container:
       image: ${{ inputs.image }}
+      options: >-
+        --network host
+      env:
+        HTTP_PROXY: http://127.0.0.1:10809
+        HTTPS_PROXY: http://127.0.0.1:10809
+        ALL_PROXY: socks5://127.0.0.1:10808
+        SOCKS_PROXY: socks5://127.0.0.1:10808
     outputs:
       dist_name: ${{ steps.list-dist.outputs.dist_name }}
     steps:
diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_test.yml
index 37932ce..fcfcac8 100644
--- a/.github/workflows/_ascend_npu_test.yml
+++ b/.github/workflows/_ascend_npu_test.yml
@@ -29,7 +29,7 @@ defaults:
 
 jobs:
   test:
-    name: test torch_npu in ${{ inputs.image }} with ${{ inputs.device }}
+    name: test torch_npu
     runs-on: ${{ inputs.runner }}
     container:
       image: ${{ inputs.image }}
@@ -45,6 +45,11 @@ jobs:
         --device /dev/davinci_manager
         --device /dev/devmm_svm
         --device /dev/hisi_hdc
+      env:
+        HTTP_PROXY: http://127.0.0.1:10809
+        HTTPS_PROXY: http://127.0.0.1:10809
+        ALL_PROXY: socks5://127.0.0.1:10808
+        SOCKS_PROXY: socks5://127.0.0.1:10808
     steps:
       - name: Show NPU info
         run: |
@@ -67,7 +72,6 @@ jobs:
           # repository: Ascend/pytorch
           repository: shink/torchnpu
           ref: feat/autoload
-          submodules: recursive
           path: torch_npu
 
       - name: Download distribution artifact
diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml
index f0e226b..5d60ee9 100644
--- a/.github/workflows/ascend_npu_test.yml
+++ b/.github/workflows/ascend_npu_test.yml
@@ -48,7 +48,7 @@ on:
           - /dev/davinci6
           - /dev/davinci7
           - /dev/davinci8
-        default: '/dev/davinci6'
+        default: '/dev/davinci5'
         description: 'The device selected to run on'
 
 # Only cancel the previous runs when triggered by a pull request
@@ -70,7 +70,7 @@ jobs:
         run: |
           set -e
           echo "runner=${{ github.event.inputs.runner || 'self-hosted' }}" >> $GITHUB_OUTPUT
-          echo "device=${{ github.event.inputs.device || '/dev/davinci6' }}" >> $GITHUB_OUTPUT
+          echo "device=${{ github.event.inputs.device || '/dev/davinci5' }}" >> $GITHUB_OUTPUT
           echo "image=${{ github.event.inputs.image || 'ascendai/cann:latest' }}" >> $GITHUB_OUTPUT
 
   build:
@@ -93,3 +93,16 @@ jobs:
       image: ${{ needs.prepare.outputs.image }}
       device: ${{ needs.prepare.outputs.device }}
       artifact_name: ${{ needs.build.outputs.artifact_name }}
+
+  benchmark:
+    name: Run benchmarks
+    needs:
+      - prepare
+      - build
+      - test
+    uses: ./.github/workflows/_ascend_npu_benchmark.yml
+    with:
+      runner: ${{ needs.prepare.outputs.runner }}
+      image: ${{ needs.prepare.outputs.image }}
+      device: ${{ needs.prepare.outputs.device }}
+      artifact_name: ${{ needs.build.outputs.artifact_name }}
diff --git a/.gitignore b/.gitignore
index 485dee6..2e54bfb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 .idea
+.benchmarks
+__pycache__
diff --git a/README.md b/README.md
index 73a9037..f30065e 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,113 @@ across various devices by running comprehensive GitHub workflows.
 
 ## Accelerator Integration Test Results
 
-| [torch_npu][1]                   |
-|----------------------------------|
-| [![Ascend NPU Test Suite][2]][3] | 
+<!-- Start -->
+
+|                                 | [torch_npu][1] |
+|---------------------------------|----------------|
+| simple_gpt                      | ❌              |
+| detectron2_fasterrcnn_r_50_dc5  | ❌              |
+| LearningToPaint                 | ✅              |
+| hf_GPT2_large                   | ✅              |
+| dcgan                           | ✅              |
+| nanogpt                         | ✅              |
+| fastNLP_Bert                    | ✅              |
+| moondream                       | ❌              |
+| mobilenet_v2_quantized_qat      | ❌              |
+| functorch_dp_cifar10            | ✅              |
+| simple_gpt_tp_manual            | ❌              |
+| speech_transformer              | ✅              |
+| yolov3                          | ✅              |
+| resnet50_quantized_qat          | ❌              |
+| sam_fast                        | ❌              |
+| alexnet                         | ✅              |
+| timm_efficientnet               | ✅              |
+| pyhpc_isoneutral_mixing         | ✅              |
+| basic_gnn_edgecnn               | ✅              |
+| nvidia_deeprecommender          | ❌              |
+| opacus_cifar10                  | ✅              |
+| dlrm                            | ✅              |
+| hf_Bert                         | ✅              |
+| hf_T5_generate                  | ✅              |
+| resnet50                        | ✅              |
+| hf_BigBird                      | ✅              |
+| resnext50_32x4d                 | ✅              |
+| pyhpc_turbulent_kinetic_energy  | ✅              |
+| llama                           | ✅              |
+| detectron2_maskrcnn_r_50_c4     | ❌              |
+| Super_SloMo                     | ✅              |
+| moco                            | ❌              |
+| stable_diffusion_unet           | ❌              |
+| microbench_unbacked_tolist_sum  | ✅              |
+| detectron2_maskrcnn_r_101_c4    | ❌              |
+| hf_distil_whisper               | ✅              |
+| mnasnet1_0                      | ✅              |
+| detectron2_fasterrcnn_r_50_fpn  | ❌              |
+| timm_resnest                    | ✅              |
+| hf_GPT2                         | ✅              |
+| squeezenet1_1                   | ✅              |
+| basic_gnn_gin                   | ✅              |
+| hf_clip                         | ✅              |
+| mobilenet_v2                    | ✅              |
+| drq                             | ✅              |
+| hf_Roberta_base                 | ✅              |
+| detectron2_maskrcnn_r_50_fpn    | ❌              |
+| timm_nfnet                      | ✅              |
+| timm_vovnet                     | ✅              |
+| doctr_det_predictor             | ✅              |
+| sam                             | ✅              |
+| hf_T5_large                     | ✅              |
+| mobilenet_v3_large              | ✅              |
+| detectron2_fcos_r_50_fpn        | ❌              |
+| soft_actor_critic               | ✅              |
+| llava                           | ❌              |
+| timm_regnet                     | ✅              |
+| functorch_maml_omniglot         | ✅              |
+| detectron2_fasterrcnn_r_101_c4  | ❌              |
+| hf_DistilBert                   | ✅              |
+| tts_angular                     | ✅              |
+| detectron2_maskrcnn             | ❌              |
+| basic_gnn_sage                  | ✅              |
+| tacotron2                       | ❌              |
+| detectron2_maskrcnn_r_101_fpn   | ❌              |
+| lennard_jones                   | ✅              |
+| pytorch_unet                    | ✅              |
+| vgg16                           | ✅              |
+| BERT_pytorch                    | ✅              |
+| timm_efficientdet               | ❌              |
+| pyhpc_equation_of_state         | ✅              |
+| maml                            | ✅              |
+| detectron2_fasterrcnn_r_50_c4   | ❌              |
+| resnet152                       | ✅              |
+| phlippe_densenet                | ✅              |
+| maml_omniglot                   | ✅              |
+| phlippe_resnet                  | ✅              |
+| pytorch_CycleGAN_and_pix2pix    | ✅              |
+| hf_Whisper                      | ✅              |
+| hf_T5                           | ✅              |
+| densenet121                     | ✅              |
+| cm3leon_generate                | ✅              |
+| detectron2_fasterrcnn_r_101_fpn | ❌              |
+| hf_Bert_large                   | ✅              |
+| stable_diffusion_text_encoder   | ❌              |
+| hf_Reformer                     | ❌              |
+| detectron2_fasterrcnn_r_101_dc5 | ❌              |
+| demucs                          | ✅              |
+| pytorch_stargan                 | ✅              |
+| hf_T5_base                      | ✅              |
+| torch_multimodal_clip           | ✅              |
+| vision_maskrcnn                 | ❌              |
+| timm_vision_transformer_large   | ✅              |
+| hf_Bart                         | ✅              |
+| shufflenet_v2_x1_0              | ✅              |
+| llama_v2_7b_16h                 | ❌              |
+| basic_gnn_gcn                   | ✅              |
+| resnet18                        | ✅              |
+| Background_Matting              | ✅              |
+| doctr_reco_predictor            | ✅              |
+| timm_vision_transformer         | ✅              |
+| hf_Albert                       | ✅              |
+| hf_Longformer                   | ✅              |
 
 [1]: https://github.com/ascend/pytorch
 
@@ -17,6 +121,8 @@ across various devices by running comprehensive GitHub workflows.
 
 [3]: https://github.com/cosdt/pytorch-integration-tests/actions/workflows/ascend_npu_test.yml
 
+<!-- End -->
+
 ## Overview
 
 This repository contains workflows and scripts that automate the testing
diff --git a/ascend_npu/README.md b/ascend_npu/README.md
new file mode 100644
index 0000000..ba18d81
--- /dev/null
+++ b/ascend_npu/README.md
@@ -0,0 +1,2 @@
+# Huawei Ascend NPU
+
diff --git a/ascend_npu/metadata.json b/ascend_npu/metadata.json
new file mode 100644
index 0000000..f41a975
--- /dev/null
+++ b/ascend_npu/metadata.json
@@ -0,0 +1,11 @@
+{
+  "device": "npu",
+  "test": [
+    "train",
+    "eval"
+  ],
+  "models": [
+    "BERT_pytorch",
+    "hf_GPT2"
+  ]
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a5d5159
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+tabulate
diff --git a/src/benchmark/__init__.py b/src/benchmark/__init__.py
new file mode 100644
index 0000000..16281fe
--- /dev/null
+++ b/src/benchmark/__init__.py
@@ -0,0 +1 @@
+from .utils import *
diff --git a/src/benchmark/utils.py b/src/benchmark/utils.py
new file mode 100644
index 0000000..b50968a
--- /dev/null
+++ b/src/benchmark/utils.py
@@ -0,0 +1,108 @@
+import ast
+import dataclasses
+import json
+import pathlib
+from typing import Dict, List, Optional
+
+
+# Same as torchbenchmark.util.experiment.instantiator.TorchBenchModelConfig
+# https://github.com/pytorch/benchmark/blob/main/torchbenchmark/util/experiment/instantiator.py#L26
+@dataclasses.dataclass
+class TorchBenchModelConfig:
+    name: str
+    test: str
+    device: str
+    batch_size: Optional[int]
+    extra_args: List[str]
+    extra_env: Optional[Dict[str, str]] = None
+    output_dir: Optional[pathlib.Path] = None
+
+
+@dataclasses.dataclass
+class TorchBenchModelMetric:
+    key: TorchBenchModelConfig
+    value: str
+
+
+def read_json(path: str) -> dict:
+    with open(path, 'r') as f:
+        data = json.load(f)
+        return data
+
+
+def save_file(path: str, data) -> None:
+    with open(path, 'w') as file:
+        file.write(data)
+
+
+def parse_to_dict(config_str: str):
+    """
+    Parse a string (like 'key1=value1, key2=value2, ...') into a dict
+    """
+    items = config_str.split(", ")
+    config = {}
+
+    for item in items:
+        key, value = item.split("=", 1)
+        try:
+            config[key] = ast.literal_eval(value)
+        except (ValueError, SyntaxError):
+            config[key] = value
+
+    return config
+
+
+def read_metrics(path: str, *, metric=None) -> List[TorchBenchModelMetric]:
+    output = read_json(path)
+    metrics_data = output.get('metrics', {})
+
+    metrics = []
+    for metric_key, metric_value in metrics_data.items():
+        key_dict = parse_to_dict(metric_key)
+        if metric is None or metric == key_dict["metric"]:
+            config = TorchBenchModelConfig(
+                name=key_dict.get("model"),
+                test=key_dict.get("test"),
+                device=key_dict.get("device"),
+                batch_size=key_dict.get("batch_size"),
+                extra_args=key_dict.get("extra_args"),
+                extra_env=key_dict.get("extra_env"),
+                output_dir=key_dict.get("output_dir"),
+            )
+            model_metric = TorchBenchModelMetric(config, metric_value)
+            metrics.append(model_metric)
+    return metrics
+
+
+def generate_table_rows(metrics: List[TorchBenchModelMetric]):
+    models = list({metric.key.name for metric in metrics})
+    devices = list({metric.key.device for metric in metrics})
+
+    def filter_result(metrics: List[TorchBenchModelMetric], *, model, device):
+        for metric in metrics:
+            if metric.key.name == model and metric.key.device == device:
+                return metric
+
+    rows = []
+    for model in models:
+        row = [model]
+        for device in devices:
+            metric = filter_result(metrics, model=model, device=device)
+            if metric is not None:
+                is_pass = metric.value == "pass"
+                cell = "✅" if is_pass else "❌"
+            else:
+                cell = ""
+            row.append(cell)
+        rows.append(row)
+
+    headers = [""] + devices
+    return headers, rows
+
+
+def to_markdown_table(metrics: List[TorchBenchModelMetric]):
+    from tabulate import tabulate
+
+    headers, rows = generate_table_rows(metrics)
+    markdown_table = tabulate(rows, headers=headers, tablefmt="github")
+    return markdown_table
diff --git a/test/benchmark/ascend_npu_benchmark.json b/test/benchmark/ascend_npu_benchmark.json
new file mode 100644
index 0000000..2316f39
--- /dev/null
+++ b/test/benchmark/ascend_npu_benchmark.json
@@ -0,0 +1,11 @@
+{
+    "name": "test_bench",
+    "environ": {
+        "pytorch_git_version": "dd2e6d61409aac22198ec771560a38adb0018ba2",
+        "pytorch_version": "2.6.0.dev20241120"
+    },
+    "metrics": {
+        "model=BERT_pytorch, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass",
+        "model=hf_GPT2, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass"
+    }
+}
\ No newline at end of file
diff --git a/test/benchmark/test_ascend_npu.py b/test/benchmark/test_ascend_npu.py
new file mode 100644
index 0000000..f78e48d
--- /dev/null
+++ b/test/benchmark/test_ascend_npu.py
@@ -0,0 +1,22 @@
+import unittest
+import src.benchmark.utils as utils
+
+path = "ascend_npu_benchmark.json"
+
+
+class TestBenchmark(unittest.TestCase):
+    def test_read_metrics(self):
+        metrics = utils.read_metrics(path, metric="accuracy")
+        self.assertTrue(len(metrics) == 2)
+        for metric in metrics:
+            self.assertEqual(metric.key.device, "npu")
+            self.assertEqual(metric.value, "pass")
+
+    def test_to_markdown_table(self):
+        metrics = utils.read_metrics(path, metric="accuracy")
+        markdown_table = utils.to_markdown_table(metrics)
+        self.assertIsNotNone(markdown_table)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/gpt2_test.py b/test/gpt2_test.py
deleted file mode 100644
index a35650d..0000000
--- a/test/gpt2_test.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import os
-
-import torch
-import torch_npu
-from transformers import GPT2Tokenizer, GPT2LMHeadModel, TrainingArguments, Trainer
-from datasets import load_dataset
-from transformers import DataCollatorForLanguageModeling
-
-
-# 固定随机种子
-def set_seed(seed=42):
-    torch.manual_seed(seed)
-    if torch.npu.is_available():
-        torch.npu.manual_seed_all(seed)
-
-
-# 训练并比较 CPU 和 GPU 的训练损失
-def train_and_compare_gpt2(model_name):
-    set_seed()
-
-    def train_on_device(use_cpu=False):
-        # 加载 GPT-2 模型和 tokenizer
-        model = GPT2LMHeadModel.from_pretrained(model_name)
-        tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-        tokenizer.pad_token = tokenizer.eos_token  # GPT-2 没有 pad_token，需要将 eos_token 作为 pad_token
-
-        # 加载 wikitext-2 数据集
-        train_dataset = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train', verification_mode="no_checks")
-        val_dataset = load_dataset('wikitext', 'wikitext-2-raw-v1', split='validation', verification_mode="no_checks")
-
-        def preprocess_function(examples):
-            return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=128)
-
-        train_dataset = train_dataset.map(preprocess_function, batched=True)
-        val_dataset = val_dataset.map(preprocess_function, batched=True)
-
-        train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
-        val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
-
-        # 设置训练参数
-        training_args = TrainingArguments(
-            output_dir='./results',
-            per_device_train_batch_size=4,
-            per_device_eval_batch_size=4,
-            num_train_epochs=1,
-            logging_dir='./logs',
-            logging_steps=10,
-            eval_strategy='epoch',
-            save_strategy='epoch',
-            report_to="none",
-            use_cpu=use_cpu
-        )
-
-        data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
-
-        # 创建 Trainer
-        trainer = Trainer(
-            data_collator=data_collator,
-            model=model,
-            args=training_args,
-            train_dataset=train_dataset,
-            eval_dataset=val_dataset
-        )
-
-        # 训练模型
-        trainer.train()
-
-        # 评估模型
-        metrics = trainer.evaluate()
-
-        # 返回评估损失
-        return metrics['eval_loss']
-
-    # 在 GPU 上训练（如果有 GPU）
-    if torch.npu.is_available():
-        print(f"Training on NPU")
-        gpu_loss = train_on_device(False)
-        print(f"GPU Training Loss: {gpu_loss:.4f}")
-    else:
-        gpu_loss = None
-        print("No GPU available for training.")
-
-    # 在 CPU 上训练
-    if os.getenv("IS_CI"):
-        # Skip training when running in CI because it's too slow
-        cpu_loss = 3.0
-    else:
-        print(f"Training on CPU")
-        cpu_loss = train_on_device(True)
-
-    print(f"CPU Training Loss: {cpu_loss:.4f}")
-
-    return cpu_loss, gpu_loss
-
-
-# 推理并比较 CPU 和 GPU 的推理损失
-def infer_and_compare_gpt2(model_name):
-    set_seed()
-
-    def infer_on_device(device: torch.device):
-        # 加载 GPT-2 模型和 tokenizer
-        model = GPT2LMHeadModel.from_pretrained(model_name).to(device)
-        tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-
-        # 设置 pad_token 为 eos_token
-        tokenizer.pad_token = tokenizer.eos_token
-
-        # 推理测试句子
-        test_sentence = "The quick brown fox jumps over the lazy dog."
-        inputs = tokenizer(test_sentence, return_tensors="pt", padding=True, truncation=True).to(device)
-
-        with torch.no_grad():
-            outputs = model(**inputs, labels=inputs["input_ids"])
-
-        # 计算损失
-        loss = outputs.loss.item()
-        return loss
-
-    # 在 GPU 上推理（如果有 GPU）
-    if torch.npu.is_available():
-        gpu_device = torch.device('npu')
-        gpu_loss = infer_on_device(gpu_device)
-        print(f"GPU Inference Loss: {gpu_loss:.4f}")
-    else:
-        gpu_loss = None
-        print("No GPU available for inference.")
-
-    # 在 CPU 上推理
-    cpu_device = torch.device('cpu')
-    cpu_loss = infer_on_device(cpu_device)
-
-    print(f"CPU Inference Loss: {cpu_loss:.4f}")
-
-    return cpu_loss, gpu_loss
-
-
-# 主函数
-if __name__ == "__main__":
-    model_name = "gpt2"
-
-    # 训练并比较训练损失
-    print("Comparing Training Loss:")
-    cpu_train_loss, gpu_train_loss = train_and_compare_gpt2(model_name)
-
-    # 推理并比较推理损失
-    print("\nComparing Inference Loss:")
-    cpu_infer_loss, gpu_infer_loss = infer_and_compare_gpt2(model_name)
diff --git a/test/requirements.txt b/test/requirements.txt
deleted file mode 100644
index 3f50646..0000000
--- a/test/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-transformers==4.44.2
-datasets==2.21.0