diff --git a/.devcontainer/build_cxx.sh b/.devcontainer/build_cxx.sh
index 0d7d62d2ed..33a9f890ef 100755
--- a/.devcontainer/build_cxx.sh
+++ b/.devcontainer/build_cxx.sh
@@ -13,7 +13,7 @@ cmake -D ENABLE_TENSORFLOW=ON \
 	-D ENABLE_PYTORCH=ON \
 	-D ENABLE_PADDLE=ON \
 	-D CMAKE_INSTALL_PREFIX=${SCRIPT_PATH}/../dp/ \
-	-D LAMMPS_VERSION=stable_22Jul2025_update1 \
+	-D LAMMPS_VERSION=stable_22Jul2025_update2 \
 	-D CMAKE_BUILD_TYPE=Debug \
 	-D BUILD_TESTING:BOOL=TRUE \
 	-D TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 5855aef6c5..e30369fcea 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,3 +5,8 @@ updates:
     schedule:
       interval: "weekly"
     target-branch: "devel"
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    target-branch: "devel"
diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
index 81f0ed01be..ac81347450 100644
--- a/.github/workflows/build_cc.yml
+++ b/.github/workflows/build_cc.yml
@@ -29,14 +29,13 @@ jobs:
         - variant: clang
           dp_variant: clang
     steps:
-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v6
     - uses: actions/setup-python@v6
       with:
         python-version: '3.11'
     - uses: lukka/get-cmake@latest
     - run: python -m pip install uv
-    - run: source/install/uv_with_retry.sh pip install --system tensorflow
-    - run: source/install/uv_with_retry.sh pip install --system 'torch==2.8.*' --index-url https://download.pytorch.org/whl/cpu
+    - run: source/install/uv_with_retry.sh pip install --system --group pin_tensorflow_cpu --group pin_pytorch_cpu --torch-backend cpu
     - run: |
          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
          && sudo dpkg -i cuda-keyring_1.0-1_all.deb \
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 21b0319c56..e50650f08a 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -37,7 +37,7 @@ jobs:
             cuda_version: 11.8
             dp_pkg_name: deepmd-kit-cu11
           # macos-x86-64
-          - os: macos-13
+          - os: macos-15-intel
             python: 311
             platform_id: macosx_x86_64
             dp_variant: cpu
@@ -57,7 +57,7 @@ jobs:
             platform_id: manylinux_aarch64
             dp_variant: cpu
     steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
         with:
           # https://github.com/pypa/setuptools_scm/issues/480
           fetch-depth: 0
@@ -73,7 +73,7 @@ jobs:
           rm -rf .git
         if: matrix.dp_pkg_name == 'deepmd-kit-cu11'
       - name: Build wheels
-        uses: pypa/cibuildwheel@v3.1
+        uses: pypa/cibuildwheel@v3.3
         env:
           CIBW_BUILD_VERBOSITY: 1
           CIBW_ARCHS: all
@@ -82,7 +82,7 @@ jobs:
           CUDA_VERSION: ${{ matrix.cuda_version }}
           DP_PKG_NAME: ${{ matrix.dp_pkg_name }}
           CIBW_BUILD_FRONTEND: 'build[uv]'
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v5
         with:
           name: cibw-cp${{ matrix.python }}-${{ matrix.platform_id }}-cu${{ matrix.cuda_version }}-${{ strategy.job-index }}
           path: ./wheelhouse/*.whl
@@ -90,13 +90,13 @@ jobs:
     name: Build source distribution
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
       - name: Build sdist
         run: pipx run uv tool run --with build[uv] --from build python -m build --installer uv --sdist
 
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v5
         with:
           name: cibw-sdist
           path: dist/*.tar.gz
@@ -109,7 +109,7 @@ jobs:
       id-token: write
     if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
     steps:
-      - uses: actions/download-artifact@v5
+      - uses: actions/download-artifact@v6
         with:
           pattern: cibw-*
           path: dist
@@ -131,8 +131,8 @@ jobs:
     steps:
       - name: Delete huge unnecessary tools folder
         run: rm -rf /opt/hostedtoolcache
-      - uses: actions/checkout@v5
-      - uses: actions/download-artifact@v5
+      - uses: actions/checkout@v6
+      - uses: actions/download-artifact@v6
         with:
           path: source/install/docker/dist
           pattern: cibw-*-manylinux_x86_64-cu${{ matrix.cuda_version }}*
@@ -165,7 +165,7 @@ jobs:
     needs: [build_wheels, build_sdist]
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/download-artifact@v5
+      - uses: actions/download-artifact@v6
         with:
           path: dist/packages
           pattern: cibw-*
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 2caf615852..7460fd7b22 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -30,7 +30,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v5
+      uses: actions/checkout@v6
     - uses: actions/setup-python@v6
       with:
         python-version: '3.11'
@@ -43,13 +43,13 @@ jobs:
          && sudo dpkg -i cuda-keyring_1.0-1_all.deb \
          && sudo apt-get update \
          && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2
-        python -m pip install tensorflow
-        python -m pip install 'torch==2.8.*' --index-url https://download.pytorch.org/whl/cpu
+        python -m pip install uv
+        uv pip install --system --group pin_tensorflow_cpu --group pin_pytorch_cpu --torch-backend cpu
       env:
         DEBIAN_FRONTEND: noninteractive
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v3
+      uses: github/codeql-action/init@v4
       with:
         languages: ${{ matrix.language }}
         queries: security-extended,security-and-quality
@@ -62,6 +62,6 @@ jobs:
       if: matrix.language == 'c-cpp'
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v3
+      uses: github/codeql-action/analyze@v4
       with:
         category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml
index 0468501433..51db243c1c 100644
--- a/.github/workflows/copilot-setup-steps.yml
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -30,7 +30,7 @@ jobs:
     # If you do not check out your code, Copilot will do this for you.
     steps:
       - name: Checkout code
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
 
       - name: Set up Python
         uses: actions/setup-python@v6
@@ -38,7 +38,7 @@ jobs:
           python-version: "3.10"
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           enable-cache: true
 
@@ -49,10 +49,7 @@ jobs:
         run: echo "VIRTUAL_ENV=$PWD/venv" >> $GITHUB_ENV && echo "$PWD/venv/bin" >> $GITHUB_PATH
 
       - name: Install base dependencies
-        run: uv pip install tensorflow-cpu
-
-      - name: Install PyTorch
-        run: uv pip install torch --index-url https://download.pytorch.org/whl/cpu
+        run: uv pip install --group pin_tensorflow_cpu --group pin_pytorch_cpu --torch-backend cpu
 
       - name: Build Python package
         run: uv pip install -e .[cpu,test]
diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml
index dae85816f5..3b37c084b2 100644
--- a/.github/workflows/package_c.yml
+++ b/.github/workflows/package_c.yml
@@ -28,7 +28,17 @@ jobs:
             tensorflow_version: ">=2.5.0,<2.15"
             filename: libdeepmd_c_cu11.tar.gz
     steps:
-      - uses: actions/checkout@v5
+      - name: Free Disk Space (Ubuntu)
+        uses: insightsengineering/disk-space-reclaimer@v1
+        with:
+          tools-cache: true
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+          docker-images: true
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
       - name: Package C library
@@ -40,7 +50,7 @@ jobs:
         if: matrix.filename != 'libdeepmd_c.tar.gz'
       # for download and debug
       - name: Upload artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: libdeepmd_c-${{ strategy.job-index }}-${{ matrix.filename }}
           path: ${{ matrix.filename }}
@@ -56,9 +66,9 @@ jobs:
     needs: [build_c]
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
       - name: Download artifact
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           pattern: libdeepmd_c-*
           merge-multiple: true
diff --git a/.github/workflows/suppr.txt b/.github/workflows/suppr.txt
index 17e8c743a0..2a43b31eb3 100644
--- a/.github/workflows/suppr.txt
+++ b/.github/workflows/suppr.txt
@@ -1 +1,5 @@
 leak:libpaddle_inference
+# TensorFlow 2.20
+leak:xla::
+leak:mlir::
+leak:llvm::
diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index 956090fe0c..32155cc309 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -17,9 +17,25 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        check_memleak: [true, false]
+        include:
+          - check_memleak: true
+            enable_tensorflow: true
+            enable_pytorch: true
+            enable_paddle: false
+          - check_memleak: true
+            enable_tensorflow: false
+            enable_pytorch: false
+            enable_paddle: true
+          - check_memleak: false
+            enable_tensorflow: true
+            enable_pytorch: true
+            enable_paddle: false
+          - check_memleak: false
+            enable_tensorflow: false
+            enable_pytorch: false
+            enable_paddle: true
     steps:
-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v6
     - uses: actions/setup-python@v6
       with:
         python-version: '3.11'
@@ -28,10 +44,9 @@ jobs:
     - run: python -m pip install uv
     - name: Install Python dependencies
       run: |
-        source/install/uv_with_retry.sh pip install --system tensorflow-cpu~=2.18.0 jax==0.5.0
+        source/install/uv_with_retry.sh pip install --system --group pin_tensorflow_cpu --group pin_pytorch_cpu --group pin_jax --torch-backend cpu
         export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
         source/install/uv_with_retry.sh pip install --system -e .[cpu,test,lmp,jax] mpi4py mpich
-        source/install/uv_with_retry.sh pip install --system 'torch==2.8.*' --index-url https://download.pytorch.org/whl/cpu
     - name: Convert models
       run: source/tests/infer/convert-models.sh
     # https://github.com/actions/runner-images/issues/9491
@@ -47,11 +62,16 @@ jobs:
         CMAKE_GENERATOR: Ninja
         CXXFLAGS: ${{ matrix.check_memleak && '-fsanitize=leak' || '' }}
         LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/workflows/suppr.txt
+        ENABLE_TENSORFLOW: ${{ matrix.enable_tensorflow && 'TRUE' || 'FALSE' }}
+        ENABLE_PYTORCH: ${{ matrix.enable_pytorch && 'TRUE' || 'FALSE' }}
+        ENABLE_PADDLE: ${{ matrix.enable_paddle && 'TRUE' || 'FALSE' }}
     # test lammps
     - run: |
         cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/paddle/lib/*.so ${{ github.workspace }}/dp_test/lib/
         cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/onednn/lib/* ${{ github.workspace }}/dp_test/lib/
         cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/mklml/lib/* ${{ github.workspace }}/dp_test/lib/
+      if: matrix.enable_paddle
+    - run: |
         export LD_LIBRARY_PATH=${{ github.workspace }}/dp_test/lib:$LD_LIBRARY_PATH
         pytest --cov=deepmd source/lmp/tests
       env:
@@ -59,19 +79,24 @@ jobs:
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
+        ENABLE_TENSORFLOW: ${{ matrix.enable_tensorflow && '1' || '0' }}
+        ENABLE_PYTORCH: ${{ matrix.enable_pytorch && '1' || '0' }}
+        ENABLE_JAX: ${{ matrix.enable_tensorflow && '1' || '0' }}
+        ENABLE_PADDLE: ${{ matrix.enable_paddle && '1' || '0' }}
       if: ${{ !matrix.check_memleak }}
     # test ipi
     - run: |
         export PATH=${{ github.workspace }}/dp_test/bin:$PATH
-        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/paddle/lib/*.so ${{ github.workspace }}/dp_test/lib/
-        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/onednn/lib/* ${{ github.workspace }}/dp_test/lib/
-        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/mklml/lib/* ${{ github.workspace }}/dp_test/lib/
         pytest --cov=deepmd source/ipi/tests
       env:
         OMP_NUM_THREADS: 1
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib
+        ENABLE_TENSORFLOW: ${{ matrix.enable_tensorflow && '1' || '0' }}
+        ENABLE_PYTORCH: ${{ matrix.enable_pytorch && '1' || '0' }}
+        ENABLE_JAX: ${{ matrix.enable_tensorflow && '1' || '0' }}
+        ENABLE_PADDLE: ${{ matrix.enable_paddle && '1' || '0' }}
       if: ${{ !matrix.check_memleak }}
     - uses: codecov/codecov-action@v5
       env:
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index 2523f71197..a934401a20 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -25,7 +25,7 @@ jobs:
     steps:
     - name: Make sudo and git work
       run: apt-get update && apt-get install -y sudo git
-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v6
     - uses: actions/setup-python@v6
       with:
         python-version: '3.11'
@@ -43,11 +43,11 @@ jobs:
          && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
       if: false  # skip as we use nvidia image
     - run: python -m pip install -U uv
-    - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.7.0" "jax[cuda12]==0.5.0"
+    - run: source/install/uv_with_retry.sh pip install --system --group pin_tensorflow_gpu --group pin_pytorch_gpu --group pin_jax "jax[cuda12]"
     - run: |
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
         export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
-        pip install "paddlepaddle-gpu==3.0.0" -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+        pip install --find-links "https://www.paddlepaddle.org.cn/packages/nightly/cu126/paddlepaddle-gpu/" --index-url https://pypi.org/simple "paddlepaddle-gpu==3.3.0.dev20251204"
         source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py --reinstall-package deepmd-kit
       env:
         DP_VARIANT: cuda
@@ -61,6 +61,7 @@ jobs:
         # See https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html
         XLA_PYTHON_CLIENT_PREALLOCATE: false
         XLA_PYTHON_CLIENT_ALLOCATOR: platform
+        FLAGS_use_stride_compute_kernel: 0
     - name: Convert models
       run: source/tests/infer/convert-models.sh
     - run: |
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index 81738dcfe9..541da88556 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -22,20 +22,17 @@ jobs:
         python: ["3.9", "3.12"]
 
     steps:
-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v6
     - uses: actions/setup-python@v6
       with:
         python-version: ${{ matrix.python }}
     - run: python -m pip install -U uv
     - run: |
-        source/install/uv_with_retry.sh pip install --system openmpi tensorflow-cpu~=2.18.0
-        source/install/uv_with_retry.sh pip install --system torch -i https://download.pytorch.org/whl/cpu
+        source/install/uv_with_retry.sh pip install --system openmpi --group pin_tensorflow_cpu --group pin_pytorch_cpu --torch-backend cpu
         export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
-        source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py "jax==0.5.0;python_version>='3.10'"
-        source/install/uv_with_retry.sh pip install --system -U setuptools
-        source/install/uv_with_retry.sh pip install --system horovod --no-build-isolation
-        source/install/uv_with_retry.sh pip install --system --pre "paddlepaddle==3.0.0" -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
+        source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py --group pin_jax
+        source/install/uv_with_retry.sh pip install --system --find-links "https://www.paddlepaddle.org.cn/packages/nightly/cpu/paddlepaddle/" --index-url https://pypi.org/simple paddlepaddle==3.3.0.dev20251204
       env:
         # Please note that uv has some issues with finding
         # existing TensorFlow package. Currently, it uses
@@ -63,6 +60,8 @@ jobs:
     - run: pytest --cov=deepmd source/tests  --splits 6 --group ${{ matrix.group }} --store-durations --clean-durations --durations-path=.test_durations --splitting-algorithm least_duration
       env:
         NUM_WORKERS: 0
+        DP_CI_IMPORT_PADDLE_BEFORE_TF: 1
+        FLAGS_use_stride_compute_kernel: 0
     - name: Test TF2 eager mode
       run: pytest --cov=deepmd --cov-append source/tests/consistent/io/test_io.py source/jax2tf_tests
       env:
@@ -72,7 +71,7 @@ jobs:
       if: matrix.group == 1
     - run: mv .test_durations .test_durations_${{ matrix.group }}
     - name: Upload partial durations
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@v5
       with:
         name: split-${{ matrix.python }}-${{ matrix.group }}
         path: .test_durations_${{ matrix.group }}
@@ -99,7 +98,7 @@ jobs:
         key: test2-durations-combined-${{ matrix.python }}-${{ github.sha }}
         restore-keys: test2-durations-combined-${{ matrix.python }}
     - name: Download artifacts
-      uses: actions/download-artifact@v5
+      uses: actions/download-artifact@v6
       with:
         pattern: split-${{ matrix.python }}-*
         merge-multiple: true
diff --git a/.github/workflows/todo.yml b/.github/workflows/todo.yml
index edde7b4be5..abeabda73b 100644
--- a/.github/workflows/todo.yml
+++ b/.github/workflows/todo.yml
@@ -8,7 +8,7 @@ jobs:
     if: github.repository_owner == 'deepmodeling'
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v6
     - name: Run tdg-github-action
       uses: ribtoks/tdg-github-action@master
       with:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9fdd20cc81..a0ae3197de 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,14 +22,14 @@ repos:
       - id: check-toml
   # Python
   - repo: https://github.com/PyCQA/isort
-    rev: 6.0.1
+    rev: 7.0.0
     hooks:
       - id: isort
         files: \.py$
         exclude: ^source/3rdparty
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.13.2
+    rev: v0.14.8
     hooks:
       - id: ruff
         args: ["--fix"]
@@ -60,7 +60,7 @@ repos:
       - id: blacken-docs
   # C++
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v21.1.2
+    rev: v21.1.7
     hooks:
       - id: clang-format
         exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$|.+\.json$)
@@ -154,7 +154,7 @@ repos:
         exclude: .pre-commit-config.yaml|source/lmp
   # customized pylint rules
   - repo: https://github.com/pylint-dev/pylint/
-    rev: v3.3.8
+    rev: v4.0.4
     hooks:
       - id: pylint
         entry: env PYTHONPATH=source/checker pylint
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index cb08609c2b..1358a9678c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -145,4 +145,4 @@ Now, your PR is successfully submitted! After this PR is merged, you will automa
 
 ## Contact us
 
-E-mail: contact@deepmodeling.org
+E-mail: deepmodeling@deepmodeling.com
diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py
index 11a967b305..df52e63219 100644
--- a/backend/find_pytorch.py
+++ b/backend/find_pytorch.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import importlib
 import os
-import platform
 import site
 from functools import (
     lru_cache,
@@ -30,6 +29,10 @@
     Version,
 )
 
+from .utils import (
+    read_dependencies_from_dependency_group,
+)
+
 
 @lru_cache
 def find_pytorch() -> tuple[Optional[str], list[str]]:
@@ -108,15 +111,15 @@ def get_pt_requirement(pt_version: str = "") -> dict:
     """
     if pt_version is None:
         return {"torch": []}
-    if (
-        os.environ.get("CIBUILDWHEEL", "0") == "1"
-        and platform.system() == "Linux"
-        and platform.machine() == "x86_64"
-    ):
+    cibw_requirement = []
+    if os.environ.get("CIBUILDWHEEL", "0") == "1":
         cuda_version = os.environ.get("CUDA_VERSION", "12.2")
         if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
             # CUDA 12.2, cudnn 9
-            pt_version = "2.8.0"
+            # or CPU builds
+            cibw_requirement = read_dependencies_from_dependency_group(
+                "pin_pytorch_cpu"
+            )
         elif cuda_version in SpecifierSet(">=11,<12"):
             # CUDA 11.8, cudnn 8
             pt_version = "2.3.1"
@@ -141,6 +144,7 @@ def get_pt_requirement(pt_version: str = "") -> dict:
             # https://github.com/pytorch/pytorch/commit/7e0c26d4d80d6602aed95cb680dfc09c9ce533bc
             else "torch>=2.1.0",
             *mpi_requirement,
+            *cibw_requirement,
         ],
     }
 
diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index a0a1e65aca..457e7a726c 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -26,6 +26,10 @@
     SpecifierSet,
 )
 
+from .utils import (
+    read_dependencies_from_dependency_group,
+)
+
 
 @lru_cache
 def find_tensorflow() -> tuple[Optional[str], list[str]]:
@@ -91,10 +95,9 @@ def find_tensorflow() -> tuple[Optional[str], list[str]]:
             cuda_version = os.environ.get("CUDA_VERSION", "12.2")
             if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
                 # CUDA 12.2, cudnn 9
+                # or CPU builds
                 requires.extend(
-                    [
-                        "tensorflow-cpu>=2.18.0; platform_machine=='x86_64' and platform_system == 'Linux'",
-                    ]
+                    read_dependencies_from_dependency_group("pin_tensorflow_cpu")
                 )
             elif cuda_version in SpecifierSet(">=11,<12"):
                 # CUDA 11.8, cudnn 8
diff --git a/backend/read_env.py b/backend/read_env.py
index 482f9766a0..8a173513f9 100644
--- a/backend/read_env.py
+++ b/backend/read_env.py
@@ -46,15 +46,15 @@ def get_argument_from_env() -> tuple[str, list, list, dict, str, str]:
     # get variant option from the environment variables, available: cpu, cuda, rocm
     dp_variant = os.environ.get("DP_VARIANT", "cpu").lower()
     if dp_variant == "cpu" or dp_variant == "":
-        cmake_minimum_required_version = "3.16"
+        cmake_minimum_required_version = "3.25.2"
     elif dp_variant == "cuda":
-        cmake_minimum_required_version = "3.23"
+        cmake_minimum_required_version = "3.25.2"
         cmake_args.append("-DUSE_CUDA_TOOLKIT:BOOL=TRUE")
         cuda_root = os.environ.get("CUDAToolkit_ROOT")
         if cuda_root:
             cmake_args.append(f"-DCUDAToolkit_ROOT:STRING={cuda_root}")
     elif dp_variant == "rocm":
-        cmake_minimum_required_version = "3.21"
+        cmake_minimum_required_version = "3.25.2"
         cmake_args.append("-DUSE_ROCM_TOOLKIT:BOOL=TRUE")
         rocm_root = os.environ.get("ROCM_ROOT")
         if not rocm_root:
diff --git a/backend/utils.py b/backend/utils.py
new file mode 100644
index 0000000000..0769879d24
--- /dev/null
+++ b/backend/utils.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import sys
+from pathlib import (
+    Path,
+)
+
+from dependency_groups import (
+    resolve,
+)
+
+if sys.version_info >= (3, 11):
+    import tomllib
+else:
+    import tomli as tomllib
+
+
+def read_dependencies_from_dependency_group(group: str) -> tuple[str, ...]:
+    """
+    Reads dependencies from a dependency group.
+
+    Parameters
+    ----------
+    group : str
+        The name of the dependency group.
+
+    Returns
+    -------
+    tuple[str, ...]
+        A tuple of dependencies in the specified group.
+    """
+    with Path("pyproject.toml").open("rb") as f:
+        pyproject = tomllib.load(f)
+
+    groups = pyproject["dependency-groups"]
+
+    return resolve(groups, group)
diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
index a380717927..10a746cbcb 100644
--- a/deepmd/dpmodel/fitting/general_fitting.py
+++ b/deepmd/dpmodel/fitting/general_fitting.py
@@ -4,6 +4,7 @@
 )
 from typing import (
     Any,
+    Callable,
     Optional,
     Union,
 )
@@ -221,6 +222,71 @@ def __init__(
             ],
         )
 
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        protection: float = 1e-2,
+    ) -> None:
+        """
+        Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `numpy.ndarray`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        protection : float
+            Divided-by-zero protection
+        """
+        if self.numb_fparam == 0 and self.numb_aparam == 0:
+            # skip data statistics
+            return
+        if callable(merged):
+            sampled = merged()
+        else:
+            sampled = merged
+        # stat fparam
+        if self.numb_fparam > 0:
+            cat_data = np.concatenate([frame["fparam"] for frame in sampled], axis=0)
+            cat_data = np.reshape(cat_data, [-1, self.numb_fparam])
+            fparam_avg = np.mean(cat_data, axis=0)
+            fparam_std = np.std(cat_data, axis=0, ddof=0)  # ddof=0 for population std
+            fparam_std = np.where(
+                fparam_std < protection,
+                np.array(protection, dtype=fparam_std.dtype),
+                fparam_std,
+            )
+            fparam_inv_std = 1.0 / fparam_std
+            self.fparam_avg = fparam_avg.astype(self.fparam_avg.dtype)
+            self.fparam_inv_std = fparam_inv_std.astype(self.fparam_inv_std.dtype)
+        # stat aparam
+        if self.numb_aparam > 0:
+            sys_sumv = []
+            sys_sumv2 = []
+            sys_sumn = []
+            for ss_ in [frame["aparam"] for frame in sampled]:
+                ss = np.reshape(ss_, [-1, self.numb_aparam])
+                sys_sumv.append(np.sum(ss, axis=0))
+                sys_sumv2.append(np.sum(ss * ss, axis=0))
+                sys_sumn.append(ss.shape[0])
+            sumv = np.sum(np.stack(sys_sumv), axis=0)
+            sumv2 = np.sum(np.stack(sys_sumv2), axis=0)
+            sumn = sum(sys_sumn)
+            aparam_avg = sumv / sumn
+            aparam_std = np.sqrt(sumv2 / sumn - (sumv / sumn) ** 2)
+            aparam_std = np.where(
+                aparam_std < protection,
+                np.array(protection, dtype=aparam_std.dtype),
+                aparam_std,
+            )
+            aparam_inv_std = 1.0 / aparam_std
+            self.aparam_avg = aparam_avg.astype(self.aparam_avg.dtype)
+            self.aparam_inv_std = aparam_inv_std.astype(self.aparam_inv_std.dtype)
+
     @abstractmethod
     def _net_out_dim(self) -> int:
         """Set the FittingNet output dim."""
diff --git a/deepmd/dpmodel/infer/deep_eval.py b/deepmd/dpmodel/infer/deep_eval.py
index b307f2f15b..f460c6062e 100644
--- a/deepmd/dpmodel/infer/deep_eval.py
+++ b/deepmd/dpmodel/infer/deep_eval.py
@@ -137,7 +137,7 @@ def model_type(self) -> type["DeepEvalWrapper"]:
             return DeepDOS
         elif "dipole" in model_output_type:
             return DeepDipole
-        elif "polar" in model_output_type:
+        elif "polar" in model_output_type or "polarizability" in model_output_type:
             return DeepPolar
         elif "wfc" in model_output_type:
             return DeepWFC
diff --git a/deepmd/env.py b/deepmd/env.py
index 2c1241a36b..7b29a338f1 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 import os
+import platform
 from configparser import (
     ConfigParser,
 )
@@ -16,6 +17,7 @@
     "GLOBAL_CONFIG",
     "GLOBAL_ENER_FLOAT_PRECISION",
     "GLOBAL_NP_FLOAT_PRECISION",
+    "LRU_CACHE_SIZE",
     "SHARED_LIB_DIR",
     "SHARED_LIB_MODULE",
     "global_float_prec",
@@ -47,6 +49,20 @@
         "DP_INTERFACE_PREC."
     )
 
+# Dynamic calculation of cache size
+_default_lru_cache_size = 512
+LRU_CACHE_SIZE = _default_lru_cache_size
+
+if platform.system() != "Windows":
+    import resource
+
+    soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
+    safe_buffer = 128
+    if soft_limit > safe_buffer + _default_lru_cache_size:
+        LRU_CACHE_SIZE = soft_limit - safe_buffer
+    else:
+        LRU_CACHE_SIZE = soft_limit // 2
+
 
 def set_env_if_empty(key: str, value: str, verbose: bool = True) -> None:
     """Set environment variable only if it is empty.
diff --git a/deepmd/jax/atomic_model/base_atomic_model.py b/deepmd/jax/atomic_model/base_atomic_model.py
index 474fcb03c7..bed75077da 100644
--- a/deepmd/jax/atomic_model/base_atomic_model.py
+++ b/deepmd/jax/atomic_model/base_atomic_model.py
@@ -3,10 +3,18 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.jax.common import (
     ArrayAPIVariable,
     to_jax_array,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.exclude_mask import (
     AtomExcludeMask,
     PairExcludeMask,
@@ -18,6 +26,8 @@ def base_atomic_model_set_attr(name: str, value: Any) -> Any:
         value = to_jax_array(value)
         if value is not None:
             value = ArrayAPIVariable(value)
+        elif Version(flax_version) >= Version("0.12.0"):
+            value = nnx.data(value)
     elif name == "pair_excl" and value is not None:
         value = PairExcludeMask(value.ntypes, value.exclude_types)
     elif name == "atom_excl" and value is not None:
diff --git a/deepmd/jax/atomic_model/linear_atomic_model.py b/deepmd/jax/atomic_model/linear_atomic_model.py
index 6ce82fa07c..a8889e4346 100644
--- a/deepmd/jax/atomic_model/linear_atomic_model.py
+++ b/deepmd/jax/atomic_model/linear_atomic_model.py
@@ -4,6 +4,10 @@
     Optional,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.atomic_model.linear_atomic_model import (
     DPZBLLinearEnergyAtomicModel as DPZBLLinearEnergyAtomicModelDP,
 )
@@ -22,8 +26,10 @@
     to_jax_array,
 )
 from deepmd.jax.env import (
+    flax_version,
     jax,
     jnp,
+    nnx,
 )
 
 
@@ -33,13 +39,19 @@ def __setattr__(self, name: str, value: Any) -> None:
         value = base_atomic_model_set_attr(name, value)
         if name == "mapping_list":
             value = [ArrayAPIVariable(to_jax_array(vv)) for vv in value]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
         elif name == "zbl_weight":
-            value = ArrayAPIVariable(to_jax_array(value))
+            # discard since it's only used in tests
+            # to fix flax.errors.TraceContextError: Cannot mutate 'FlaxModule' from different trace level
+            return
         elif name == "models":
             value = [
                 DPAtomicModel.deserialize(value[0].serialize()),
                 PairTabAtomicModel.deserialize(value[1].serialize()),
             ]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
         return super().__setattr__(name, value)
 
     def forward_common_atomic(
diff --git a/deepmd/jax/atomic_model/pairtab_atomic_model.py b/deepmd/jax/atomic_model/pairtab_atomic_model.py
index 023f4e886a..8cdd0f7ca6 100644
--- a/deepmd/jax/atomic_model/pairtab_atomic_model.py
+++ b/deepmd/jax/atomic_model/pairtab_atomic_model.py
@@ -4,6 +4,10 @@
     Optional,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.atomic_model.pairtab_atomic_model import (
     PairTabAtomicModel as PairTabAtomicModelDP,
 )
@@ -16,8 +20,10 @@
     to_jax_array,
 )
 from deepmd.jax.env import (
+    flax_version,
     jax,
     jnp,
+    nnx,
 )
 
 
@@ -29,6 +35,8 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         return super().__setattr__(name, value)
 
     def forward_common_atomic(
diff --git a/deepmd/jax/descriptor/dpa1.py b/deepmd/jax/descriptor/dpa1.py
index fef9bd5448..07695b23ed 100644
--- a/deepmd/jax/descriptor/dpa1.py
+++ b/deepmd/jax/descriptor/dpa1.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.dpa1 import DescrptBlockSeAtten as DescrptBlockSeAttenDP
 from deepmd.dpmodel.descriptor.dpa1 import DescrptDPA1 as DescrptDPA1DP
 from deepmd.dpmodel.descriptor.dpa1 import GatedAttentionLayer as GatedAttentionLayerDP
@@ -20,6 +24,10 @@
 from deepmd.jax.descriptor.base_descriptor import (
     BaseDescriptor,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.exclude_mask import (
     PairExcludeMask,
 )
@@ -58,6 +66,8 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = [
                 NeighborGatedAttentionLayer.deserialize(ii.serialize()) for ii in value
             ]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
         return super().__setattr__(name, value)
 
 
@@ -68,9 +78,13 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"embeddings", "embeddings_strip"}:
             if value is not None:
                 value = NetworkCollection.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name == "dpa1_attention":
             value = NeighborGatedAttention.deserialize(value.serialize())
         elif name == "env_mat":
diff --git a/deepmd/jax/descriptor/dpa2.py b/deepmd/jax/descriptor/dpa2.py
index 8eea324b41..8da450d2ec 100644
--- a/deepmd/jax/descriptor/dpa2.py
+++ b/deepmd/jax/descriptor/dpa2.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.dpa2 import DescrptDPA2 as DescrptDPA2DP
 from deepmd.dpmodel.utils.network import Identity as IdentityDP
 from deepmd.dpmodel.utils.network import NativeLayer as NativeLayerDP
@@ -23,6 +27,10 @@
 from deepmd.jax.descriptor.se_t_tebd import (
     DescrptBlockSeTTebd,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.network import (
     NativeLayer,
 )
@@ -39,18 +47,23 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"repinit"}:
             value = DescrptBlockSeAtten.deserialize(value.serialize())
         elif name in {"repinit_three_body"}:
             if value is not None:
                 value = DescrptBlockSeTTebd.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"repformers"}:
             value = DescrptBlockRepformers.deserialize(value.serialize())
         elif name in {"type_embedding"}:
             value = TypeEmbedNet.deserialize(value.serialize())
         elif name in {"g1_shape_tranform", "tebd_transform"}:
             if value is None:
-                pass
+                if Version(flax_version) >= Version("0.12.0"):
+                    value = nnx.data(value)
             elif isinstance(value, NativeLayerDP):
                 value = NativeLayer.deserialize(value.serialize())
             elif isinstance(value, IdentityDP):
diff --git a/deepmd/jax/descriptor/dpa3.py b/deepmd/jax/descriptor/dpa3.py
index 299afaf01d..9f734bd553 100644
--- a/deepmd/jax/descriptor/dpa3.py
+++ b/deepmd/jax/descriptor/dpa3.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.dpa3 import DescrptDPA3 as DescrptDPA3DP
 from deepmd.jax.common import (
     ArrayAPIVariable,
@@ -15,6 +19,10 @@
 from deepmd.jax.descriptor.repflows import (
     DescrptBlockRepflows,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.type_embed import (
     TypeEmbedNet,
 )
@@ -28,6 +36,8 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"repflows"}:
             value = DescrptBlockRepflows.deserialize(value.serialize())
         elif name in {"type_embedding"}:
diff --git a/deepmd/jax/descriptor/hybrid.py b/deepmd/jax/descriptor/hybrid.py
index 20fc5f838b..b76e515c54 100644
--- a/deepmd/jax/descriptor/hybrid.py
+++ b/deepmd/jax/descriptor/hybrid.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.hybrid import DescrptHybrid as DescrptHybridDP
 from deepmd.jax.common import (
     ArrayAPIVariable,
@@ -12,6 +16,10 @@
 from deepmd.jax.descriptor.base_descriptor import (
     BaseDescriptor,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 
 
 @BaseDescriptor.register("hybrid")
@@ -20,7 +28,11 @@ class DescrptHybrid(DescrptHybridDP):
     def __setattr__(self, name: str, value: Any) -> None:
         if name in {"nlist_cut_idx"}:
             value = [ArrayAPIVariable(to_jax_array(vv)) for vv in value]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
         elif name in {"descrpt_list"}:
             value = [BaseDescriptor.deserialize(vv.serialize()) for vv in value]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
 
         return super().__setattr__(name, value)
diff --git a/deepmd/jax/descriptor/repflows.py b/deepmd/jax/descriptor/repflows.py
index df137639aa..be26012a52 100644
--- a/deepmd/jax/descriptor/repflows.py
+++ b/deepmd/jax/descriptor/repflows.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.repflows import (
     DescrptBlockRepflows as DescrptBlockRepflowsDP,
 )
@@ -12,6 +16,10 @@
     flax_module,
     to_jax_array,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.exclude_mask import (
     PairExcludeMask,
 )
@@ -27,8 +35,12 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"layers"}:
             value = [RepFlowLayer.deserialize(layer.serialize()) for layer in value]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
         elif name in {"edge_embd", "angle_embd"}:
             value = NativeLayer.deserialize(value.serialize())
         elif name in {"env_mat_edge", "env_mat_angle"}:
@@ -58,8 +70,12 @@ def __setattr__(self, name: str, value: Any) -> None:
         }:
             if value is not None:
                 value = NativeLayer.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"n_residual", "e_residual", "a_residual"}:
             value = [ArrayAPIVariable(to_jax_array(vv)) for vv in value]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
         else:
             pass
         return super().__setattr__(name, value)
diff --git a/deepmd/jax/descriptor/repformers.py b/deepmd/jax/descriptor/repformers.py
index 77ca4a9a6b..5701677349 100644
--- a/deepmd/jax/descriptor/repformers.py
+++ b/deepmd/jax/descriptor/repformers.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.repformers import (
     Atten2EquiVarApply as Atten2EquiVarApplyDP,
 )
@@ -20,6 +24,10 @@
     flax_module,
     to_jax_array,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.exclude_mask import (
     PairExcludeMask,
 )
@@ -36,8 +44,12 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"layers"}:
             value = [RepformerLayer.deserialize(layer.serialize()) for layer in value]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
         elif name == "g2_embd":
             value = NativeLayer.deserialize(value.serialize())
         elif name == "env_mat":
@@ -87,21 +99,35 @@ def __setattr__(self, name: str, value: Any) -> None:
         if name in {"linear1", "linear2", "g1_self_mlp", "proj_g1g2", "proj_g1g1g2"}:
             if value is not None:
                 value = NativeLayer.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"g1_residual", "g2_residual", "h2_residual"}:
             value = [ArrayAPIVariable(to_jax_array(vv)) for vv in value]
+            if Version(flax_version) >= Version("0.12.0"):
+                value = nnx.List([nnx.data(item) for item in value])
         elif name in {"attn2g_map"}:
             if value is not None:
                 value = Atten2Map.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"attn2_mh_apply"}:
             if value is not None:
                 value = Atten2MultiHeadApply.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"attn2_lm"}:
             if value is not None:
                 value = LayerNorm.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"attn2_ev_apply"}:
             if value is not None:
                 value = Atten2EquiVarApply.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"loc_attn"}:
             if value is not None:
                 value = LocalAtten.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         return super().__setattr__(name, value)
diff --git a/deepmd/jax/descriptor/se_e2_a.py b/deepmd/jax/descriptor/se_e2_a.py
index 31c147ad9d..4d704a4b30 100644
--- a/deepmd/jax/descriptor/se_e2_a.py
+++ b/deepmd/jax/descriptor/se_e2_a.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.se_e2_a import DescrptSeAArrayAPI as DescrptSeADP
 from deepmd.jax.common import (
     ArrayAPIVariable,
@@ -12,6 +16,10 @@
 from deepmd.jax.descriptor.base_descriptor import (
     BaseDescriptor,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.exclude_mask import (
     PairExcludeMask,
 )
@@ -29,9 +37,13 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"embeddings"}:
             if value is not None:
                 value = NetworkCollection.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name == "env_mat":
             # env_mat doesn't store any value
             pass
diff --git a/deepmd/jax/descriptor/se_e2_r.py b/deepmd/jax/descriptor/se_e2_r.py
index 96ff4103dd..e5827c42af 100644
--- a/deepmd/jax/descriptor/se_e2_r.py
+++ b/deepmd/jax/descriptor/se_e2_r.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.se_r import DescrptSeR as DescrptSeRDP
 from deepmd.jax.common import (
     ArrayAPIVariable,
@@ -12,6 +16,10 @@
 from deepmd.jax.descriptor.base_descriptor import (
     BaseDescriptor,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.exclude_mask import (
     PairExcludeMask,
 )
@@ -29,9 +37,13 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"embeddings"}:
             if value is not None:
                 value = NetworkCollection.deserialize(value.serialize())
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name == "env_mat":
             # env_mat doesn't store any value
             pass
diff --git a/deepmd/jax/descriptor/se_t.py b/deepmd/jax/descriptor/se_t.py
index 029f4231fe..6d0b026c94 100644
--- a/deepmd/jax/descriptor/se_t.py
+++ b/deepmd/jax/descriptor/se_t.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.se_t import DescrptSeT as DescrptSeTDP
 from deepmd.jax.common import (
     ArrayAPIVariable,
@@ -12,6 +16,10 @@
 from deepmd.jax.descriptor.base_descriptor import (
     BaseDescriptor,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.exclude_mask import (
     PairExcludeMask,
 )
@@ -30,6 +38,8 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"embeddings"}:
             if value is not None:
                 value = NetworkCollection.deserialize(value.serialize())
diff --git a/deepmd/jax/descriptor/se_t_tebd.py b/deepmd/jax/descriptor/se_t_tebd.py
index 84e3d3f084..8e2dae782a 100644
--- a/deepmd/jax/descriptor/se_t_tebd.py
+++ b/deepmd/jax/descriptor/se_t_tebd.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.descriptor.se_t_tebd import (
     DescrptBlockSeTTebd as DescrptBlockSeTTebdDP,
 )
@@ -15,6 +19,10 @@
 from deepmd.jax.descriptor.base_descriptor import (
     BaseDescriptor,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.exclude_mask import (
     PairExcludeMask,
 )
@@ -33,6 +41,8 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         elif name in {"embeddings", "embeddings_strip"}:
             if value is not None:
                 value = NetworkCollection.deserialize(value.serialize())
diff --git a/deepmd/jax/env.py b/deepmd/jax/env.py
index 422e4ea4ce..d425d5a0e8 100644
--- a/deepmd/jax/env.py
+++ b/deepmd/jax/env.py
@@ -5,6 +5,7 @@
 
 import jax
 import jax.numpy as jnp
+from flax import __version__ as flax_version
 from flax import (
     nnx,
 )
@@ -19,6 +20,7 @@
     jax.config.update("jax_numpy_dtype_promotion", "strict")
 
 __all__ = [
+    "flax_version",
     "jax",
     "jax_export",
     "jnp",
diff --git a/deepmd/jax/fitting/fitting.py b/deepmd/jax/fitting/fitting.py
index e69bded640..f19787fada 100644
--- a/deepmd/jax/fitting/fitting.py
+++ b/deepmd/jax/fitting/fitting.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.fitting.dipole_fitting import DipoleFitting as DipoleFittingNetDP
 from deepmd.dpmodel.fitting.dos_fitting import DOSFittingNet as DOSFittingNetDP
 from deepmd.dpmodel.fitting.ener_fitting import EnergyFittingNet as EnergyFittingNetDP
@@ -17,6 +21,10 @@
     flax_module,
     to_jax_array,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.fitting.base_fitting import (
     BaseFitting,
 )
@@ -40,6 +48,8 @@ def setattr_for_general_fitting(name: str, value: Any) -> Any:
         value = to_jax_array(value)
         if value is not None:
             value = ArrayAPIVariable(value)
+        elif Version(flax_version) >= Version("0.12.0"):
+            value = nnx.data(value)
     elif name == "emask":
         value = AtomExcludeMask(value.ntypes, value.exclude_types)
     elif name == "nets":
@@ -91,4 +101,6 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         return super().__setattr__(name, value)
diff --git a/deepmd/jax/infer/deep_eval.py b/deepmd/jax/infer/deep_eval.py
index 92ed78a13e..fbd8860c0c 100644
--- a/deepmd/jax/infer/deep_eval.py
+++ b/deepmd/jax/infer/deep_eval.py
@@ -158,7 +158,7 @@ def model_type(self) -> type["DeepEvalWrapper"]:
             return DeepDOS
         elif "dipole" in model_output_type:
             return DeepDipole
-        elif "polar" in model_output_type:
+        elif "polar" in model_output_type or "polarizability" in model_output_type:
             return DeepPolar
         elif "wfc" in model_output_type:
             return DeepWFC
diff --git a/deepmd/jax/utils/exclude_mask.py b/deepmd/jax/utils/exclude_mask.py
index 18d13d9400..4ae230c8dc 100644
--- a/deepmd/jax/utils/exclude_mask.py
+++ b/deepmd/jax/utils/exclude_mask.py
@@ -3,6 +3,10 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.utils.exclude_mask import AtomExcludeMask as AtomExcludeMaskDP
 from deepmd.dpmodel.utils.exclude_mask import PairExcludeMask as PairExcludeMaskDP
 from deepmd.jax.common import (
@@ -10,6 +14,10 @@
     flax_module,
     to_jax_array,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 
 
 @flax_module
@@ -19,6 +27,8 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         return super().__setattr__(name, value)
 
 
@@ -29,4 +39,6 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         return super().__setattr__(name, value)
diff --git a/deepmd/jax/utils/network.py b/deepmd/jax/utils/network.py
index 5a42323b90..72d9f760eb 100644
--- a/deepmd/jax/utils/network.py
+++ b/deepmd/jax/utils/network.py
@@ -5,6 +5,9 @@
 )
 
 import numpy as np
+from packaging.version import (
+    Version,
+)
 
 from deepmd.dpmodel.common import (
     NativeOP,
@@ -23,6 +26,7 @@
     to_jax_array,
 )
 from deepmd.jax.env import (
+    flax_version,
     nnx,
 )
 
@@ -56,7 +60,10 @@ def __setattr__(self, name: str, value: Any) -> None:
 
 @flax_module
 class NativeNet(make_multilayer_network(NativeLayer, NativeOP)):
-    pass
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"layers"} and Version(flax_version) >= Version("0.12.0"):
+            value = nnx.List(value)
+        return super().__setattr__(name, value)
 
 
 class EmbeddingNet(make_embedding_network(NativeNet, NativeLayer)):
@@ -75,6 +82,11 @@ class NetworkCollection(NetworkCollectionDP):
         "fitting_network": FittingNet,
     }
 
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in {"_networks"} and Version(flax_version) >= Version("0.12.0"):
+            value = nnx.List([nnx.data(item) for item in value])
+        return super().__setattr__(name, value)
+
 
 class LayerNorm(LayerNormDP, NativeLayer):
     pass
diff --git a/deepmd/jax/utils/type_embed.py b/deepmd/jax/utils/type_embed.py
index 30cd9f45a9..aff0a78a2c 100644
--- a/deepmd/jax/utils/type_embed.py
+++ b/deepmd/jax/utils/type_embed.py
@@ -3,12 +3,20 @@
     Any,
 )
 
+from packaging.version import (
+    Version,
+)
+
 from deepmd.dpmodel.utils.type_embed import TypeEmbedNet as TypeEmbedNetDP
 from deepmd.jax.common import (
     ArrayAPIVariable,
     flax_module,
     to_jax_array,
 )
+from deepmd.jax.env import (
+    flax_version,
+    nnx,
+)
 from deepmd.jax.utils.network import (
     EmbeddingNet,
 )
@@ -21,6 +29,8 @@ def __setattr__(self, name: str, value: Any) -> None:
             value = to_jax_array(value)
             if value is not None:
                 value = ArrayAPIVariable(value)
+            elif Version(flax_version) >= Version("0.12.0"):
+                value = nnx.data(value)
         if name in {"embedding_net"}:
             value = EmbeddingNet.deserialize(value.serialize())
         return super().__setattr__(name, value)
diff --git a/deepmd/pd/entrypoints/main.py b/deepmd/pd/entrypoints/main.py
index fe092111b1..7ce2699188 100644
--- a/deepmd/pd/entrypoints/main.py
+++ b/deepmd/pd/entrypoints/main.py
@@ -95,7 +95,7 @@ def get_trainer(
     # Initialize DDP
     world_size = dist.get_world_size()
     if world_size > 1:
-        assert paddle.version.nccl() != "0"
+        assert not paddle.core.is_compiled_with_nccl() or paddle.version.nccl() != "0"
         fleet.init(is_collective=True)
 
     def prepare_trainer_input_single(
@@ -214,7 +214,7 @@ def get_compute_device(self) -> str:
 
     def get_ngpus(self) -> int:
         """Get the number of GPUs."""
-        return paddle.device.cuda.device_count()
+        return paddle.device.device_count()
 
     def get_backend_info(self) -> dict:
         """Get backend information."""
diff --git a/deepmd/pd/model/atomic_model/base_atomic_model.py b/deepmd/pd/model/atomic_model/base_atomic_model.py
index 4f40117fb7..e4bf23bf53 100644
--- a/deepmd/pd/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pd/model/atomic_model/base_atomic_model.py
@@ -515,6 +515,24 @@ def change_out_bias(
         else:
             raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the atomic model from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        pass
+
     def _get_forward_wrapper_func(self) -> Callable[..., paddle.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
 
diff --git a/deepmd/pd/model/atomic_model/dp_atomic_model.py b/deepmd/pd/model/atomic_model/dp_atomic_model.py
index 816245c28a..7e102f6456 100644
--- a/deepmd/pd/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pd/model/atomic_model/dp_atomic_model.py
@@ -397,12 +397,35 @@ def wrapped_sampler():
             return sampled
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
-        self.fitting_net.compute_input_stats(
-            wrapped_sampler, protection=self.data_stat_protect
-        )
+        self.compute_fitting_input_stat(wrapped_sampler, stat_file_path)
         if compute_or_load_out_stat:
             self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The dictionary of paths to the statistics files.
+        """
+        self.fitting_net.compute_input_stats(
+            sample_merged,
+            protection=self.data_stat_protect,
+            stat_file_path=stat_file_path,
+        )
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
diff --git a/deepmd/pd/model/model/make_model.py b/deepmd/pd/model/model/make_model.py
index 42c406f8d7..dafb15e5cd 100644
--- a/deepmd/pd/model/model/make_model.py
+++ b/deepmd/pd/model/model/make_model.py
@@ -228,6 +228,8 @@ def change_out_bias(
                 merged,
                 bias_adjust_mode=bias_adjust_mode,
             )
+            if bias_adjust_mode == "set-by-statistic":
+                self.atomic_model.compute_fitting_input_stat(merged)
 
         def forward_common_lower(
             self,
diff --git a/deepmd/pd/model/task/fitting.py b/deepmd/pd/model/task/fitting.py
index 398630e1d2..cb0396e0a2 100644
--- a/deepmd/pd/model/task/fitting.py
+++ b/deepmd/pd/model/task/fitting.py
@@ -40,6 +40,9 @@
     get_index_between_two_maps,
     map_atom_exclude_types,
 )
+from deepmd.utils.path import (
+    DPPath,
+)
 
 dtype = env.GLOBAL_PD_FLOAT_PRECISION
 device = env.DEVICE
@@ -76,6 +79,7 @@ def compute_input_stats(
         self,
         merged: Union[Callable[[], list[dict]], list[dict]],
         protection: float = 1e-2,
+        stat_file_path: Optional[DPPath] = None,
     ) -> None:
         """
         Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
@@ -91,6 +95,8 @@ def compute_input_stats(
                 the lazy function helps by only sampling once.
         protection : float
             Divided-by-zero protection
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
         """
         if self.numb_fparam == 0 and self.numb_aparam == 0:
             # skip data statistics
diff --git a/deepmd/pd/utils/auto_batch_size.py b/deepmd/pd/utils/auto_batch_size.py
index 0431fb80ae..f45746ed95 100644
--- a/deepmd/pd/utils/auto_batch_size.py
+++ b/deepmd/pd/utils/auto_batch_size.py
@@ -36,7 +36,7 @@ def is_gpu_available(self) -> bool:
         bool
             True if GPU is available
         """
-        return paddle.device.cuda.device_count() > 0
+        return paddle.device.device_count() > 0
 
     def is_oom_error(self, e: Exception) -> bool:
         """Check if the exception is an OOM error.
@@ -51,6 +51,6 @@ def is_oom_error(self, e: Exception) -> bool:
         # (the meaningless error message should be considered as a bug in cusolver)
         if isinstance(e, MemoryError) and ("ResourceExhaustedError" in e.args[0]):
             # Release all unoccupied cached memory
-            paddle.device.cuda.empty_cache()
+            paddle.device.empty_cache()
             return True
         return False
diff --git a/deepmd/pd/utils/env.py b/deepmd/pd/utils/env.py
index 28606d0945..8715b59f54 100644
--- a/deepmd/pd/utils/env.py
+++ b/deepmd/pd/utils/env.py
@@ -29,10 +29,10 @@
 # Make sure DDP uses correct device if applicable
 LOCAL_RANK = int(os.environ.get("PADDLE_LOCAL_RANK", 0))
 
-if os.environ.get("DEVICE") == "cpu" or paddle.device.cuda.device_count() <= 0:
+if os.environ.get("DEVICE") == "cpu" or paddle.device.device_count() <= 0:
     DEVICE = "cpu"
 else:
-    DEVICE = f"gpu:{LOCAL_RANK}"
+    DEVICE = paddle.device.get_device()
 
 paddle.device.set_device(DEVICE)
 
diff --git a/deepmd/pd/utils/utils.py b/deepmd/pd/utils/utils.py
index 175ac5019b..7224547805 100644
--- a/deepmd/pd/utils/utils.py
+++ b/deepmd/pd/utils/utils.py
@@ -3,6 +3,7 @@
     annotations,
 )
 
+import warnings
 from contextlib import (
     contextmanager,
 )
@@ -345,8 +346,21 @@ def get_generator(
             generator = paddle.framework.core.default_cuda_generator(
                 int(DEVICE.split("gpu:")[1])
             )
+        elif DEVICE == "xpu":
+            generator = paddle.framework.core.default_xpu_generator(0)
+        elif DEVICE.startswith("xpu:"):
+            generator = paddle.framework.core.default_xpu_generator(
+                int(DEVICE.split("xpu:")[1])
+            )
         else:
-            raise ValueError("DEVICE should be cpu or gpu or gpu:x")
+            # return none for compability in different devices
+            warnings.warn(
+                f"DEVICE is {DEVICE}, which is not supported. Returning None.",
+                category=UserWarning,
+                stacklevel=2,
+            )
+            return None
+            # raise ValueError("DEVICE should be cpu or gpu or gpu:x or xpu or xpu:x")
         generator.manual_seed(seed)
         return generator
     else:
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index f3e52cdac0..d4800bb4ca 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -248,7 +248,7 @@ def model_type(self) -> type["DeepEvalWrapper"]:
             return DeepDOS
         elif "dipole" in model_output_type:
             return DeepDipole
-        elif "polar" in model_output_type:
+        elif "polar" in model_output_type or "polarizability" in model_output_type:
             return DeepPolar
         elif "global_polar" in model_output_type:
             return DeepGlobalPolar
@@ -397,9 +397,9 @@ def _get_request_defs(self, atomic: bool) -> list[OutputVariableDef]:
             The requested output definitions.
         """
         if atomic:
-            return list(self.output_def.var_defs.values())
+            output_defs = list(self.output_def.var_defs.values())
         else:
-            return [
+            output_defs = [
                 x
                 for x in self.output_def.var_defs.values()
                 if x.category
@@ -411,6 +411,13 @@ def _get_request_defs(self, atomic: bool) -> list[OutputVariableDef]:
                     OutputVariableCategory.DERV_R_DERV_R,
                 )
             ]
+        if not self.get_has_hessian():
+            output_defs = [
+                x
+                for x in output_defs
+                if x.category != OutputVariableCategory.DERV_R_DERV_R
+            ]
+        return output_defs
 
     def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
         """Wrapper method with auto batch size.
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index b8ba0a1981..6b786a796f 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -493,6 +493,24 @@ def change_out_bias(
         else:
             raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the atomic model from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        pass
+
     def _get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
 
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 5b7d96560f..fdff461efa 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -5,6 +5,7 @@
     Any,
     Callable,
     Optional,
+    Union,
 )
 
 import torch
@@ -325,15 +326,47 @@ def wrapped_sampler() -> list[dict]:
                 atom_exclude_types = self.atom_excl.get_exclude_types()
                 for sample in sampled:
                     sample["atom_exclude_types"] = list(atom_exclude_types)
+            if (
+                "find_fparam" not in sampled[0]
+                and "fparam" not in sampled[0]
+                and self.has_default_fparam()
+            ):
+                default_fparam = self.get_default_fparam()
+                for sample in sampled:
+                    nframe = sample["atype"].shape[0]
+                    sample["fparam"] = default_fparam.repeat(nframe, 1)
             return sampled
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
-        self.fitting_net.compute_input_stats(
-            wrapped_sampler, protection=self.data_stat_protect
-        )
+        self.compute_fitting_input_stat(wrapped_sampler, stat_file_path)
         if compute_or_load_out_stat:
             self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The dictionary of paths to the statistics files.
+        """
+        self.fitting_net.compute_input_stats(
+            sample_merged,
+            protection=self.data_stat_protect,
+            stat_file_path=stat_file_path,
+        )
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
@@ -342,6 +375,9 @@ def has_default_fparam(self) -> bool:
         """Check if the model has default frame parameters."""
         return self.fitting_net.has_default_fparam()
 
+    def get_default_fparam(self) -> Optional[torch.Tensor]:
+        return self.fitting_net.get_default_fparam()
+
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.fitting_net.get_dim_aparam()
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index e158dd3725..7f600ccc2e 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import warnings
 from typing import (
     Any,
     Callable,
@@ -304,7 +305,8 @@ def __init__(
         self.use_econf_tebd = use_econf_tebd
         self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
-        self.compress = False
+        self.tebd_compress = False
+        self.geo_compress = False
         self.type_embedding = TypeEmbedNet(
             ntypes,
             tebd_dim,
@@ -592,12 +594,17 @@ def enable_compression(
         check_frequency
             The overflow check frequency
         """
-        # do some checks before the mocel compression process
-        if self.compress:
+        # do some checks before the model compression process
+        if self.tebd_compress or self.geo_compress:
             raise ValueError("Compression is already enabled.")
+
+        if self.tebd_input_mode != "strip":
+            raise RuntimeError("Type embedding compression only works in strip mode")
+
         assert not self.se_atten.resnet_dt, (
             "Model compression error: descriptor resnet_dt must be false!"
         )
+
         for tt in self.se_atten.exclude_types:
             if (tt[0] not in range(self.se_atten.ntypes)) or (
                 tt[1] not in range(self.se_atten.ntypes)
@@ -609,6 +616,7 @@ def enable_compression(
                     + str(self.se_atten.ntypes)
                     + "!"
                 )
+
         if (
             self.se_atten.ntypes * self.se_atten.ntypes
             - len(self.se_atten.exclude_types)
@@ -618,34 +626,38 @@ def enable_compression(
                 "Empty embedding-nets are not supported in model compression!"
             )
 
-        if self.se_atten.attn_layer != 0:
-            raise RuntimeError("Cannot compress model when attention layer is not 0.")
-
-        if self.tebd_input_mode != "strip":
-            raise RuntimeError("Cannot compress model when tebd_input_mode == 'concat'")
-
-        data = self.serialize()
-        self.table = DPTabulate(
-            self,
-            data["neuron"],
-            data["type_one_side"],
-            data["exclude_types"],
-            ActivationFn(data["activation_function"]),
-        )
-        self.table_config = [
-            table_extrapolate,
-            table_stride_1,
-            table_stride_2,
-            check_frequency,
-        ]
-        self.lower, self.upper = self.table.build(
-            min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
-        )
+        # Enable type embedding compression
+        self.se_atten.type_embedding_compression(self.type_embedding)
+        self.tebd_compress = True
+
+        if self.se_atten.attn_layer == 0:
+            data = self.serialize()
+            self.table = DPTabulate(
+                self,
+                data["neuron"],
+                data["type_one_side"],
+                data["exclude_types"],
+                ActivationFn(data["activation_function"]),
+            )
+            self.table_config = [
+                table_extrapolate,
+                table_stride_1,
+                table_stride_2,
+                check_frequency,
+            ]
+            self.lower, self.upper = self.table.build(
+                min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
+            )
 
-        self.se_atten.enable_compression(
-            self.table.data, self.table_config, self.lower, self.upper
-        )
-        self.compress = True
+            self.se_atten.enable_compression(
+                self.table.data, self.table_config, self.lower, self.upper
+            )
+            self.geo_compress = True
+        else:
+            warnings.warn(
+                "Attention layer is not 0, only type embedding is compressed. Geometric part is not compressed.",
+                UserWarning,
+            )
 
     def forward(
         self,
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index 5858206cc3..583f18f2be 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import warnings
 from typing import (
     Any,
     Callable,
@@ -938,36 +939,41 @@ def enable_compression(
                 "Repinit empty embedding-nets are not supported in model compression!"
             )
 
-        if self.repinit.attn_layer != 0:
+        if self.repinit.tebd_input_mode != "strip":
             raise RuntimeError(
-                "Cannot compress model when repinit attention layer is not 0."
+                "Cannot compress model when repinit tebd_input_mode != 'strip'"
             )
 
-        if self.repinit.tebd_input_mode != "strip":
-            raise RuntimeError(
-                "Cannot compress model when repinit tebd_input_mode == 'concat'"
+        if self.repinit.attn_layer == 0:
+            # repinit doesn't have a serialize method
+            data = self.serialize()
+            self.table = DPTabulate(
+                self,
+                data["repinit_args"]["neuron"],
+                data["repinit_args"]["type_one_side"],
+                data["exclude_types"],
+                ActivationFn(data["repinit_args"]["activation_function"]),
+            )
+            self.table_config = [
+                table_extrapolate,
+                table_stride_1,
+                table_stride_2,
+                check_frequency,
+            ]
+            self.lower, self.upper = self.table.build(
+                min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
             )
 
-        # repinit doesn't have a serialize method
-        data = self.serialize()
-        self.table = DPTabulate(
-            self,
-            data["repinit_args"]["neuron"],
-            data["repinit_args"]["type_one_side"],
-            data["exclude_types"],
-            ActivationFn(data["repinit_args"]["activation_function"]),
-        )
-        self.table_config = [
-            table_extrapolate,
-            table_stride_1,
-            table_stride_2,
-            check_frequency,
-        ]
-        self.lower, self.upper = self.table.build(
-            min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
-        )
+            self.repinit.enable_compression(
+                self.table.data, self.table_config, self.lower, self.upper
+            )
+        else:
+            warnings.warn(
+                "Attention layer is not 0, only type embedding is compressed. Geometric part is not compressed.",
+                UserWarning,
+            )
+
+        # Enable type embedding compression for repinit (se_atten)
+        self.repinit.type_embedding_compression(self.type_embedding)
 
-        self.repinit.enable_compression(
-            self.table.data, self.table_config, self.lower, self.upper
-        )
         self.compress = True
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index bfcb510810..7c1de6146a 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -27,6 +27,9 @@
     MLPLayer,
     NetworkCollection,
 )
+from deepmd.pt.model.network.network import (
+    TypeEmbedNet,
+)
 from deepmd.pt.utils import (
     env,
 )
@@ -272,15 +275,20 @@ def __init__(
             self.filter_layers_strip = filter_layers_strip
         self.stats = None
 
-        # add for compression
-        self.compress = False
+        self.tebd_compress = False
+        self.geo_compress = False
         self.is_sorted = False
+        # For geometric compression
         self.compress_info = nn.ParameterList(
             [nn.Parameter(torch.zeros(0, dtype=self.prec, device="cpu"))]
         )
         self.compress_data = nn.ParameterList(
             [nn.Parameter(torch.zeros(0, dtype=self.prec, device=env.DEVICE))]
         )
+        # For type embedding compression
+        self.register_buffer(
+            "type_embd_data", torch.zeros(0, dtype=self.prec, device=env.DEVICE)
+        )
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
@@ -445,7 +453,59 @@ def enable_compression(
             device="cpu",
         )
         self.compress_data[0] = table_data[net].to(device=env.DEVICE, dtype=self.prec)
-        self.compress = True
+        self.geo_compress = True
+
+    def type_embedding_compression(self, type_embedding_net: TypeEmbedNet) -> None:
+        """Enable type embedding compression for strip mode.
+
+        Precomputes embedding network outputs for all type combinations:
+        - One-side: (ntypes+1) combinations (neighbor types only)
+        - Two-side: (ntypes+1)² combinations (neighbor x center type pairs)
+
+        Parameters
+        ----------
+        type_embedding_net : TypeEmbedNet
+            The type embedding network that provides get_full_embedding() method
+        """
+        if self.tebd_input_mode != "strip":
+            raise RuntimeError("Type embedding compression only works in strip mode")
+        if self.filter_layers_strip is None:
+            raise RuntimeError(
+                "filter_layers_strip must be initialized for type embedding compression"
+            )
+
+        with torch.no_grad():
+            # Get full type embedding: (ntypes+1) x tebd_dim
+            full_embd = type_embedding_net.get_full_embedding(env.DEVICE)
+            nt, t_dim = full_embd.shape
+
+            if self.type_one_side:
+                # One-side: only neighbor types, much simpler!
+                # Precompute for all (ntypes+1) neighbor types
+                embd_tensor = self.filter_layers_strip.networks[0](full_embd).detach()
+                if hasattr(self, "type_embd_data"):
+                    del self.type_embd_data
+                self.register_buffer("type_embd_data", embd_tensor)
+            else:
+                # Two-side: all (ntypes+1)² type pair combinations
+                # Create [neighbor, center] combinations
+                # for a fixed row i, all columns j have different neighbor types
+                embd_nei = full_embd.view(1, nt, t_dim).expand(nt, nt, t_dim)
+                # for a fixed row i, all columns j share the same center type i
+                embd_center = full_embd.view(nt, 1, t_dim).expand(nt, nt, t_dim)
+                two_side_embd = torch.cat([embd_nei, embd_center], dim=-1).reshape(
+                    -1, t_dim * 2
+                )
+                # Precompute for all type pairs
+                # Index formula: idx = center_type * nt + neighbor_type
+                embd_tensor = self.filter_layers_strip.networks[0](
+                    two_side_embd
+                ).detach()
+                if hasattr(self, "type_embd_data"):
+                    del self.type_embd_data
+                self.register_buffer("type_embd_data", embd_tensor)
+
+        self.tebd_compress = True
 
     def forward(
         self,
@@ -572,47 +632,49 @@ def forward(
             nlist_index = nlist.reshape(nb, nloc * nnei)
             # nf x (nl x nnei)
             nei_type = torch.gather(extended_atype, dim=1, index=nlist_index)
-            # (nf x nl x nnei) x ng
-            nei_type_index = nei_type.view(-1, 1).expand(-1, ng).type(torch.long)
             if self.type_one_side:
-                tt_full = self.filter_layers_strip.networks[0](type_embedding)
-                # (nf x nl x nnei) x ng
-                gg_t = torch.gather(tt_full, dim=0, index=nei_type_index)
+                if self.tebd_compress:
+                    tt_full = self.type_embd_data
+                else:
+                    # (ntypes+1, tebd_dim) -> (ntypes+1, ng)
+                    tt_full = self.filter_layers_strip.networks[0](type_embedding)
+                # (nf*nl*nnei,) -> (nf*nl*nnei, ng)
+                gg_t = tt_full[nei_type.view(-1).type(torch.long)]
             else:
                 idx_i = torch.tile(
                     atype.reshape(-1, 1) * ntypes_with_padding, [1, nnei]
                 ).view(-1)
                 idx_j = nei_type.view(-1)
+                # (nf x nl x nnei)
+                idx = (idx_i + idx_j).to(torch.long)
+                if self.tebd_compress:
+                    # ((ntypes+1)^2, ng)
+                    tt_full = self.type_embd_data
+                else:
+                    # ((ntypes+1)^2) * (ntypes+1)^2 * nt
+                    type_embedding_nei = torch.tile(
+                        type_embedding.view(1, ntypes_with_padding, nt),
+                        [ntypes_with_padding, 1, 1],
+                    )
+                    # (ntypes+1)^2 * ((ntypes+1)^2) * nt
+                    type_embedding_center = torch.tile(
+                        type_embedding.view(ntypes_with_padding, 1, nt),
+                        [1, ntypes_with_padding, 1],
+                    )
+                    # ((ntypes+1)^2 * (ntypes+1)^2) * (nt+nt)
+                    two_side_type_embedding = torch.cat(
+                        [type_embedding_nei, type_embedding_center], -1
+                    ).reshape(-1, nt * 2)
+                    tt_full = self.filter_layers_strip.networks[0](
+                        two_side_type_embedding
+                    )
                 # (nf x nl x nnei) x ng
-                idx = (
-                    (idx_i + idx_j)
-                    .view(-1, 1)
-                    .expand(-1, ng)
-                    .type(torch.long)
-                    .to(torch.long)
-                )
-                # (ntypes) * ntypes * nt
-                type_embedding_nei = torch.tile(
-                    type_embedding.view(1, ntypes_with_padding, nt),
-                    [ntypes_with_padding, 1, 1],
-                )
-                # ntypes * (ntypes) * nt
-                type_embedding_center = torch.tile(
-                    type_embedding.view(ntypes_with_padding, 1, nt),
-                    [1, ntypes_with_padding, 1],
-                )
-                # (ntypes * ntypes) * (nt+nt)
-                two_side_type_embedding = torch.cat(
-                    [type_embedding_nei, type_embedding_center], -1
-                ).reshape(-1, nt * 2)
-                tt_full = self.filter_layers_strip.networks[0](two_side_type_embedding)
-                # (nf x nl x nnei) x ng
-                gg_t = torch.gather(tt_full, dim=0, index=idx)
+                gg_t = tt_full[idx]
             # (nf x nl) x nnei x ng
             gg_t = gg_t.reshape(nfnl, nnei, ng)
             if self.smooth:
                 gg_t = gg_t * sw.reshape(-1, self.nnei, 1)
-            if self.compress:
+            if self.geo_compress:
                 ss = ss.reshape(-1, 1)
                 gg_t = gg_t.reshape(-1, gg_t.size(-1))
                 xyz_scatter = torch.ops.deepmd.tabulate_fusion_se_atten(
@@ -660,7 +722,7 @@ def forward(
         return (
             result.view(nframes, nloc, self.filter_neuron[-1] * self.axis_neuron),
             gg.view(nframes, nloc, self.nnei, self.filter_neuron[-1])
-            if not self.compress
+            if not self.geo_compress
             else None,
             dmatrix.view(nframes, nloc, self.nnei, 4)[..., 1:],
             rot_mat.view(nframes, nloc, self.filter_neuron[-1], 3),
diff --git a/deepmd/pt/model/descriptor/se_t_tebd.py b/deepmd/pt/model/descriptor/se_t_tebd.py
index f7de1c3015..8225d8e4af 100644
--- a/deepmd/pt/model/descriptor/se_t_tebd.py
+++ b/deepmd/pt/model/descriptor/se_t_tebd.py
@@ -7,6 +7,7 @@
 )
 
 import torch
+import torch.nn as nn
 
 from deepmd.dpmodel.utils import EnvMat as DPEnvMat
 from deepmd.dpmodel.utils.seed import (
@@ -39,9 +40,15 @@
 from deepmd.pt.utils.exclude_mask import (
     PairExcludeMask,
 )
+from deepmd.pt.utils.tabulate import (
+    DPTabulate,
+)
 from deepmd.pt.utils.update_sel import (
     UpdateSel,
 )
+from deepmd.pt.utils.utils import (
+    ActivationFn,
+)
 from deepmd.utils.data_system import (
     DeepmdDataSystem,
 )
@@ -181,6 +188,7 @@ def __init__(
         self.tebd_input_mode = tebd_input_mode
         self.concat_output_tebd = concat_output_tebd
         self.trainable = trainable
+        self.compress = False
         # set trainable
         for param in self.parameters():
             param.requires_grad = trainable
@@ -516,6 +524,86 @@ def update_sel(
         local_jdata_cpy["sel"] = sel[0]
         return local_jdata_cpy, min_nbor_dist
 
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+    ) -> None:
+        """Receive the statistics (distance, max_nbor_size and env_mat_range) of the training data.
+
+        Parameters
+        ----------
+        min_nbor_dist
+            The nearest distance between atoms
+        table_extrapolate
+            The scale of model extrapolation
+        table_stride_1
+            The uniform stride of the first table
+        table_stride_2
+            The uniform stride of the second table
+        check_frequency
+            The overflow check frequency
+        """
+        # do some checks before the model compression process
+        if self.compress:
+            raise ValueError("Compression is already enabled.")
+        assert not self.se_ttebd.resnet_dt, (
+            "Model compression error: descriptor resnet_dt must be false!"
+        )
+        if self.tebd_input_mode != "strip":
+            raise RuntimeError("Cannot compress model when tebd_input_mode != 'strip'")
+        for tt in self.se_ttebd.exclude_types:
+            if (tt[0] not in range(self.se_ttebd.ntypes)) or (
+                tt[1] not in range(self.se_ttebd.ntypes)
+            ):
+                raise RuntimeError(
+                    "exclude types"
+                    + str(tt)
+                    + " must within the number of atomic types "
+                    + str(self.se_ttebd.ntypes)
+                    + "!"
+                )
+        if (
+            self.se_ttebd.ntypes * self.se_ttebd.ntypes
+            - len(self.se_ttebd.exclude_types)
+            == 0
+        ):
+            raise RuntimeError(
+                "Empty embedding-nets are not supported in model compression!"
+            )
+
+        data = self.serialize()
+        self.table = DPTabulate(
+            self,
+            data["neuron"],
+            exclude_types=data["exclude_types"],
+            activation_fn=ActivationFn(data["activation_function"]),
+        )
+        # Scale the stride values for SE_T descriptor
+        stride_1_scaled = table_stride_1 * 10
+        stride_2_scaled = table_stride_2 * 10
+        self.table_config = [
+            table_extrapolate,
+            stride_1_scaled,
+            stride_2_scaled,
+            check_frequency,
+        ]
+        self.lower, self.upper = self.table.build(
+            min_nbor_dist, table_extrapolate, stride_1_scaled, stride_2_scaled
+        )
+
+        self.se_ttebd.enable_compression(
+            self.type_embedding,
+            self.table.data,
+            self.table_config,
+            self.lower,
+            self.upper,
+        )
+        self.compress = True
+
 
 @DescriptorBlock.register("se_ttebd")
 class DescrptBlockSeTTebd(DescriptorBlock):
@@ -607,6 +695,19 @@ def __init__(
             )
             self.filter_layers_strip = filter_layers_strip
         self.stats = None
+        # compression related variables
+        self.compress = False
+        # For geometric compression
+        self.compress_info = nn.ParameterList(
+            [nn.Parameter(torch.zeros(0, dtype=self.prec, device="cpu"))]
+        )
+        self.compress_data = nn.ParameterList(
+            [nn.Parameter(torch.zeros(0, dtype=self.prec, device=env.DEVICE))]
+        )
+        # For type embedding compression
+        self.register_buffer(
+            "type_embd_data", torch.zeros(0, dtype=self.prec, device=env.DEVICE)
+        )
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
@@ -811,6 +912,7 @@ def forward(
             self.rcut_smth,
             protection=self.env_protection,
         )
+        # dmatrix: [1/r, dx/r^2, dy/r^2, dz/r^2], sw: distance weighting
         # nb x nloc x nnei
         exclude_mask = self.emask(nlist, extended_atype)
         nlist = torch.where(exclude_mask != 0, nlist, -1)
@@ -831,11 +933,13 @@ def forward(
         rr = dmatrix
         rr = rr * exclude_mask[:, :, None]
 
-        # nfnl x nt_i x 3
+        # nfnl x nt_i x 3: direction vectors
+        # nt_i = nnei
+        # nt_j = nnei
         rr_i = rr[:, :, 1:]
         # nfnl x nt_j x 3
         rr_j = rr[:, :, 1:]
-        # nfnl x nt_i x nt_j
+        # nfnl x nt_i x nt_j: three-body angular correlations (cos theta_ij)
         env_ij = torch.einsum("ijm,ikm->ijk", rr_i, rr_j)
         # nfnl x nt_i x nt_j x 1
         ss = env_ij.unsqueeze(-1)
@@ -857,8 +961,24 @@ def forward(
             # nfnl x nt_i x nt_j x ng
             gg = self.filter_layers.networks[0](ss)
         elif self.tebd_input_mode in ["strip"]:
-            # nfnl x nt_i x nt_j x ng
-            gg_s = self.filter_layers.networks[0](ss)
+            if self.compress:
+                # Tabulated geometric embedding from angular features
+                # using SE_T_TEBD specific function
+                ebd_env_ij = env_ij.view(-1, 1)
+                gg_s = torch.ops.deepmd.tabulate_fusion_se_t_tebd(
+                    self.compress_data[0].contiguous(),
+                    self.compress_info[0].cpu().contiguous(),
+                    ebd_env_ij.contiguous(),  # em_x: (nfnl * nt_i * nt_j, 1)
+                    env_ij.contiguous(),  # em: (nfnl, nt_i, nt_j)
+                    self.filter_neuron[-1],
+                )[0]
+                # SE_T_TEBD tabulation preserves the full neighbor structure
+                # nfnl x nt_i x nt_j x ng
+                gg_s = gg_s.view(nfnl, nnei, nnei, self.filter_neuron[-1])
+            else:
+                # nfnl x nt_i x nt_j x ng
+                gg_s = self.filter_layers.networks[0](ss)
+
             assert self.filter_layers_strip is not None
             assert type_embedding is not None
             ng = self.filter_neuron[-1]
@@ -874,44 +994,40 @@ def forward(
             nei_type_j = nei_type.unsqueeze(1).expand([-1, nnei, -1])
             idx_i = nei_type_i * ntypes_with_padding
             idx_j = nei_type_j
-            # (nf x nl x nt_i x nt_j) x ng
-            idx = (
-                (idx_i + idx_j)
-                .view(-1, 1)
-                .expand(-1, ng)
-                .type(torch.long)
-                .to(torch.long)
-            )
-            # ntypes * (ntypes) * nt
-            type_embedding_i = torch.tile(
-                type_embedding.view(ntypes_with_padding, 1, nt),
-                [1, ntypes_with_padding, 1],
-            )
-            # (ntypes) * ntypes * nt
-            type_embedding_j = torch.tile(
-                type_embedding.view(1, ntypes_with_padding, nt),
-                [ntypes_with_padding, 1, 1],
-            )
-            # (ntypes * ntypes) * (nt+nt)
-            two_side_type_embedding = torch.cat(
-                [type_embedding_i, type_embedding_j], -1
-            ).reshape(-1, nt * 2)
-            tt_full = self.filter_layers_strip.networks[0](two_side_type_embedding)
+            idx = (idx_i + idx_j).reshape(-1).to(torch.long)
+            if self.compress:
+                tt_full = self.type_embd_data
+            else:
+                type_embedding_i = torch.tile(
+                    type_embedding.view(ntypes_with_padding, 1, nt),
+                    [1, ntypes_with_padding, 1],
+                )
+                type_embedding_j = torch.tile(
+                    type_embedding.view(1, ntypes_with_padding, nt),
+                    [ntypes_with_padding, 1, 1],
+                )
+                two_side_type_embedding = torch.cat(
+                    [type_embedding_i, type_embedding_j], -1
+                ).reshape(-1, nt * 2)
+                tt_full = self.filter_layers_strip.networks[0](two_side_type_embedding)
             # (nfnl x nt_i x nt_j) x ng
-            gg_t = torch.gather(tt_full, dim=0, index=idx)
+            gg_t = tt_full[idx]
             # (nfnl x nt_i x nt_j) x ng
             gg_t = gg_t.reshape(nfnl, nnei, nnei, ng)
             if self.smooth:
+                # Apply distance weighting to type features
                 gg_t = (
                     gg_t
                     * sw.reshape(nfnl, self.nnei, 1, 1)
                     * sw.reshape(nfnl, 1, self.nnei, 1)
                 )
+            # Combine geometric and type embeddings: gg_s * (1 + gg_t)
             # nfnl x nt_i x nt_j x ng
             gg = gg_s * gg_t + gg_s
         else:
             raise NotImplementedError
 
+        # Contract angular correlations with learned features
         # nfnl x ng
         res_ij = torch.einsum("ijk,ijkm->im", env_ij, gg)
         res_ij = res_ij * (1.0 / float(self.nnei) / float(self.nnei))
@@ -925,6 +1041,72 @@ def forward(
             sw,
         )
 
+    def enable_compression(
+        self,
+        type_embedding_net: TypeEmbedNet,
+        table_data: dict,
+        table_config: dict,
+        lower: dict,
+        upper: dict,
+    ) -> None:
+        """Enable compression for the SE_T_TEBD descriptor block.
+
+        Parameters
+        ----------
+        type_embedding_net : TypeEmbedNet
+            The type embedding network
+        table_data : dict
+            The tabulated data from DPTabulate
+        table_config : dict
+            Configuration for table compression
+        lower : dict
+            Lower bounds for compression
+        upper : dict
+            Upper bounds for compression
+        """
+        if self.tebd_input_mode != "strip":
+            raise RuntimeError("Type embedding compression only works in strip mode")
+        if self.filter_layers_strip is None:
+            raise RuntimeError(
+                "filter_layers_strip must exist for type embedding compression"
+            )
+
+        # Compress the main geometric embedding network (self.filter_layers)
+        net_key = "filter_net"
+        self.compress_info[0] = torch.as_tensor(
+            [
+                lower[net_key],
+                upper[net_key],
+                upper[net_key] * table_config[0],
+                table_config[1],
+                table_config[2],
+                table_config[3],
+            ],
+            dtype=self.prec,
+            device="cpu",
+        )
+        self.compress_data[0] = table_data[net_key].to(
+            device=env.DEVICE, dtype=self.prec
+        )
+
+        # Compress the type embedding network (self.filter_layers_strip)
+        with torch.no_grad():
+            full_embd = type_embedding_net.get_full_embedding(env.DEVICE)
+            nt, t_dim = full_embd.shape
+            type_embedding_i = full_embd.view(nt, 1, t_dim).expand(nt, nt, t_dim)
+            type_embedding_j = full_embd.view(1, nt, t_dim).expand(nt, nt, t_dim)
+            two_side_type_embedding = torch.cat(
+                [type_embedding_i, type_embedding_j], dim=-1
+            ).reshape(-1, t_dim * 2)
+            embd_tensor = self.filter_layers_strip.networks[0](
+                two_side_type_embedding
+            ).detach()
+            if hasattr(self, "type_embd_data"):
+                del self.type_embd_data
+            self.register_buffer("type_embd_data", embd_tensor)
+
+        self.compress = True
+
     def has_message_passing(self) -> bool:
         """Returns whether the descriptor block has message passing."""
         return False
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index 53d32977b0..3d97c0d4db 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -232,6 +232,8 @@ def change_out_bias(
                 merged,
                 bias_adjust_mode=bias_adjust_mode,
             )
+            if bias_adjust_mode == "set-by-statistic":
+                self.atomic_model.compute_fitting_input_stat(merged)
 
         def forward_common_lower(
             self,
@@ -530,6 +532,9 @@ def has_default_fparam(self) -> bool:
             """Check if the model has default frame parameters."""
             return self.atomic_model.has_default_fparam()
 
+        def get_default_fparam(self) -> Optional[torch.Tensor]:
+            return self.atomic_model.get_default_fparam()
+
         @torch.jit.export
         def get_dim_aparam(self) -> int:
             """Get the number (dimension) of atomic parameters of this atomic model."""
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 4c8e90ef7c..578e1683ba 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -37,10 +37,16 @@
     to_numpy_array,
     to_torch_tensor,
 )
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
 from deepmd.utils.finetune import (
     get_index_between_two_maps,
     map_atom_exclude_types,
 )
+from deepmd.utils.path import (
+    DPPath,
+)
 
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
 device = env.DEVICE
@@ -57,7 +63,12 @@ def __new__(cls, *args: Any, **kwargs: Any) -> "Fitting":
         return super().__new__(cls)
 
     def share_params(
-        self, base_class: "Fitting", shared_level: int, resume: bool = False
+        self,
+        base_class: "Fitting",
+        shared_level: int,
+        model_prob: float = 1.0,
+        protection: float = 1e-2,
+        resume: bool = False,
     ) -> None:
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
@@ -69,16 +80,164 @@ def share_params(
         )
         if shared_level == 0:
             # only not share the bias_atom_e and the case_embd
+            # link fparam buffers
+            if self.numb_fparam > 0:
+                if not resume:
+                    base_fparam = base_class.stats["fparam"]
+                    assert len(base_fparam) == self.numb_fparam
+                    for ii in range(self.numb_fparam):
+                        base_fparam[ii] += self.get_stats()["fparam"][ii] * model_prob
+                    fparam_avg = np.array([ii.compute_avg() for ii in base_fparam])
+                    fparam_std = np.array(
+                        [ii.compute_std(protection=protection) for ii in base_fparam]
+                    )
+                    fparam_inv_std = 1.0 / fparam_std
+                    base_class.fparam_avg.copy_(
+                        torch.tensor(
+                            fparam_avg,
+                            device=env.DEVICE,
+                            dtype=base_class.fparam_avg.dtype,
+                        )
+                    )
+                    base_class.fparam_inv_std.copy_(
+                        torch.tensor(
+                            fparam_inv_std,
+                            device=env.DEVICE,
+                            dtype=base_class.fparam_inv_std.dtype,
+                        )
+                    )
+                self.fparam_avg = base_class.fparam_avg
+                self.fparam_inv_std = base_class.fparam_inv_std
+
+            # link aparam buffers
+            if self.numb_aparam > 0:
+                if not resume:
+                    base_aparam = base_class.stats["aparam"]
+                    assert len(base_aparam) == self.numb_aparam
+                    for ii in range(self.numb_aparam):
+                        base_aparam[ii] += self.get_stats()["aparam"][ii] * model_prob
+                    aparam_avg = np.array([ii.compute_avg() for ii in base_aparam])
+                    aparam_std = np.array(
+                        [ii.compute_std(protection=protection) for ii in base_aparam]
+                    )
+                    aparam_inv_std = 1.0 / aparam_std
+                    base_class.aparam_avg.copy_(
+                        torch.tensor(
+                            aparam_avg,
+                            device=env.DEVICE,
+                            dtype=base_class.aparam_avg.dtype,
+                        )
+                    )
+                    base_class.aparam_inv_std.copy_(
+                        torch.tensor(
+                            aparam_inv_std,
+                            device=env.DEVICE,
+                            dtype=base_class.aparam_inv_std.dtype,
+                        )
+                    )
+                self.aparam_avg = base_class.aparam_avg
+                self.aparam_inv_std = base_class.aparam_inv_std
             # the following will successfully link all the params except buffers, which need manually link.
             for item in self._modules:
                 self._modules[item] = base_class._modules[item]
         else:
             raise NotImplementedError
 
+    def save_to_file_fparam(
+        self,
+        stat_file_path: DPPath,
+    ) -> None:
+        """Save the statistics of fparam.
+
+        Parameters
+        ----------
+        stat_file_path : DPPath
+            The path to save the statistics of fparam.
+        """
+        assert stat_file_path is not None
+        stat_file_path.mkdir(exist_ok=True, parents=True)
+        if len(self.stats) == 0:
+            raise ValueError("The statistics hasn't been computed.")
+        fp = stat_file_path / "fparam"
+        _fparam_stat = []
+        for ii in range(self.numb_fparam):
+            _tmp_stat = self.stats["fparam"][ii]
+            _fparam_stat.append(
+                [_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum]
+            )
+        _fparam_stat = np.array(_fparam_stat)
+        fp.save_numpy(_fparam_stat)
+        log.info(f"Save fparam stats to {fp}.")
+
+    def save_to_file_aparam(
+        self,
+        stat_file_path: DPPath,
+    ) -> None:
+        """Save the statistics of aparam.
+
+        Parameters
+        ----------
+        stat_file_path : DPPath
+            The path to save the statistics of aparam.
+        """
+        assert stat_file_path is not None
+        stat_file_path.mkdir(exist_ok=True, parents=True)
+        if len(self.stats) == 0:
+            raise ValueError("The statistics hasn't been computed.")
+        fp = stat_file_path / "aparam"
+        _aparam_stat = []
+        for ii in range(self.numb_aparam):
+            _tmp_stat = self.stats["aparam"][ii]
+            _aparam_stat.append(
+                [_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum]
+            )
+        _aparam_stat = np.array(_aparam_stat)
+        fp.save_numpy(_aparam_stat)
+        log.info(f"Save aparam stats to {fp}.")
+
+    def restore_fparam_from_file(self, stat_file_path: DPPath) -> None:
+        """Load the statistics of fparam.
+
+        Parameters
+        ----------
+        stat_file_path : DPPath
+            The path to load the statistics of fparam.
+        """
+        fp = stat_file_path / "fparam"
+        arr = fp.load_numpy()
+        assert arr.shape == (self.numb_fparam, 3)
+        _fparam_stat = []
+        for ii in range(self.numb_fparam):
+            _fparam_stat.append(
+                StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2])
+            )
+        self.stats["fparam"] = _fparam_stat
+        log.info(f"Load fparam stats from {fp}.")
+
+    def restore_aparam_from_file(self, stat_file_path: DPPath) -> None:
+        """Load the statistics of aparam.
+
+        Parameters
+        ----------
+        stat_file_path : DPPath
+            The path to load the statistics of aparam.
+        """
+        fp = stat_file_path / "aparam"
+        arr = fp.load_numpy()
+        assert arr.shape == (self.numb_aparam, 3)
+        _aparam_stat = []
+        for ii in range(self.numb_aparam):
+            _aparam_stat.append(
+                StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2])
+            )
+        self.stats["aparam"] = _aparam_stat
+        log.info(f"Load aparam stats from {fp}.")
+
     def compute_input_stats(
         self,
         merged: Union[Callable[[], list[dict]], list[dict]],
         protection: float = 1e-2,
+        stat_file_path: Optional[DPPath] = None,
     ) -> None:
         """
         Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
@@ -94,67 +253,101 @@ def compute_input_stats(
                 the lazy function helps by only sampling once.
         protection : float
             Divided-by-zero protection
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
         """
         if self.numb_fparam == 0 and self.numb_aparam == 0:
             # skip data statistics
+            self.stats = None
             return
-        if callable(merged):
-            sampled = merged()
-        else:
-            sampled = merged
+
+        self.stats = {}
+
         # stat fparam
         if self.numb_fparam > 0:
-            cat_data = torch.cat([frame["fparam"] for frame in sampled], dim=0)
-            cat_data = torch.reshape(cat_data, [-1, self.numb_fparam])
-            fparam_avg = torch.mean(cat_data, dim=0)
-            fparam_std = torch.std(cat_data, dim=0, unbiased=False)
-            fparam_std = torch.where(
-                fparam_std < protection,
-                torch.tensor(
-                    protection, dtype=fparam_std.dtype, device=fparam_std.device
-                ),
-                fparam_std,
-            )
-            fparam_inv_std = 1.0 / fparam_std
-            self.fparam_avg.copy_(
-                torch.tensor(fparam_avg, device=env.DEVICE, dtype=self.fparam_avg.dtype)
-            )
-            self.fparam_inv_std.copy_(
-                torch.tensor(
-                    fparam_inv_std, device=env.DEVICE, dtype=self.fparam_inv_std.dtype
+            if (
+                stat_file_path is not None
+                and stat_file_path.is_dir()
+                and (stat_file_path / "fparam").is_file()
+            ):
+                self.restore_fparam_from_file(stat_file_path)
+            else:
+                sampled = merged() if callable(merged) else merged
+                self.stats["fparam"] = []
+                cat_data = to_numpy_array(
+                    torch.cat([frame["fparam"] for frame in sampled], dim=0)
                 )
+                cat_data = np.reshape(cat_data, [-1, self.numb_fparam])
+                sumv = np.sum(cat_data, axis=0)
+                sumv2 = np.sum(cat_data * cat_data, axis=0)
+                sumn = cat_data.shape[0]
+                for ii in range(self.numb_fparam):
+                    self.stats["fparam"].append(
+                        StatItem(
+                            number=sumn,
+                            sum=sumv[ii],
+                            squared_sum=sumv2[ii],
+                        )
+                    )
+                if stat_file_path is not None:
+                    self.save_to_file_fparam(stat_file_path)
+
+            fparam_avg = np.array([ii.compute_avg() for ii in self.stats["fparam"]])
+            fparam_std = np.array(
+                [ii.compute_std(protection=protection) for ii in self.stats["fparam"]]
             )
+            fparam_inv_std = 1.0 / fparam_std
+            log.info(f"fparam_avg is {fparam_avg}, fparam_inv_std is {fparam_inv_std}")
+            self.fparam_avg.copy_(to_torch_tensor(fparam_avg))
+            self.fparam_inv_std.copy_(to_torch_tensor(fparam_inv_std))
+
         # stat aparam
         if self.numb_aparam > 0:
-            sys_sumv = []
-            sys_sumv2 = []
-            sys_sumn = []
-            for ss_ in [frame["aparam"] for frame in sampled]:
-                ss = torch.reshape(ss_, [-1, self.numb_aparam])
-                sys_sumv.append(torch.sum(ss, dim=0))
-                sys_sumv2.append(torch.sum(ss * ss, dim=0))
-                sys_sumn.append(ss.shape[0])
-            sumv = torch.sum(torch.stack(sys_sumv), dim=0)
-            sumv2 = torch.sum(torch.stack(sys_sumv2), dim=0)
-            sumn = sum(sys_sumn)
-            aparam_avg = sumv / sumn
-            aparam_std = torch.sqrt(sumv2 / sumn - (sumv / sumn) ** 2)
-            aparam_std = torch.where(
-                aparam_std < protection,
-                torch.tensor(
-                    protection, dtype=aparam_std.dtype, device=aparam_std.device
-                ),
-                aparam_std,
+            if (
+                stat_file_path is not None
+                and stat_file_path.is_dir()
+                and (stat_file_path / "aparam").is_file()
+            ):
+                self.restore_aparam_from_file(stat_file_path)
+            else:
+                sampled = merged() if callable(merged) else merged
+                self.stats["aparam"] = []
+                sys_sumv = []
+                sys_sumv2 = []
+                sys_sumn = []
+                for ss_ in [frame["aparam"] for frame in sampled]:
+                    ss = np.reshape(to_numpy_array(ss_), [-1, self.numb_aparam])
+                    sys_sumv.append(np.sum(ss, axis=0))
+                    sys_sumv2.append(np.sum(ss * ss, axis=0))
+                    sys_sumn.append(ss.shape[0])
+                sumv = np.sum(np.stack(sys_sumv), axis=0)
+                sumv2 = np.sum(np.stack(sys_sumv2), axis=0)
+                sumn = sum(sys_sumn)
+                for ii in range(self.numb_aparam):
+                    self.stats["aparam"].append(
+                        StatItem(
+                            number=sumn,
+                            sum=sumv[ii],
+                            squared_sum=sumv2[ii],
+                        )
+                    )
+                if stat_file_path is not None:
+                    self.save_to_file_aparam(stat_file_path)
+
+            aparam_avg = np.array([ii.compute_avg() for ii in self.stats["aparam"]])
+            aparam_std = np.array(
+                [ii.compute_std(protection=protection) for ii in self.stats["aparam"]]
             )
             aparam_inv_std = 1.0 / aparam_std
-            self.aparam_avg.copy_(
-                torch.tensor(aparam_avg, device=env.DEVICE, dtype=self.aparam_avg.dtype)
-            )
-            self.aparam_inv_std.copy_(
-                torch.tensor(
-                    aparam_inv_std, device=env.DEVICE, dtype=self.aparam_inv_std.dtype
-                )
-            )
+            log.info(f"aparam_avg is {aparam_avg}, aparam_inv_std is {aparam_inv_std}")
+            self.aparam_avg.copy_(to_torch_tensor(aparam_avg))
+            self.aparam_inv_std.copy_(to_torch_tensor(aparam_inv_std))
+
+    def get_stats(self) -> dict[str, list[StatItem]]:
+        """Get the statistics of the fitting_net."""
+        if self.stats is None:
+            raise RuntimeError("The statistics of fitting net has not been computed.")
+        return self.stats
 
 
 class GeneralFitting(Fitting):
@@ -447,6 +640,9 @@ def has_default_fparam(self) -> bool:
         """Check if the fitting has default frame parameters."""
         return self.default_fparam is not None
 
+    def get_default_fparam(self) -> Optional[torch.Tensor]:
+        return self.default_fparam_tensor
+
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.numb_aparam
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 52d2888081..d099b8b20b 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -616,11 +616,37 @@ def single_model_finetune(
             frz_model = torch.jit.load(init_frz_model, map_location=DEVICE)
             self.model.load_state_dict(frz_model.state_dict())
 
+        # Get model prob for multi-task
+        if self.multi_task:
+            self.model_prob = np.array([0.0 for key in self.model_keys])
+            if training_params.get("model_prob", None) is not None:
+                model_prob = training_params["model_prob"]
+                for ii, model_key in enumerate(self.model_keys):
+                    if model_key in model_prob:
+                        self.model_prob[ii] += float(model_prob[model_key])
+            else:
+                for ii, model_key in enumerate(self.model_keys):
+                    self.model_prob[ii] += float(len(self.training_data[model_key]))
+            sum_prob = np.sum(self.model_prob)
+            assert sum_prob > 0.0, "Sum of model prob must be larger than 0!"
+            self.model_prob = self.model_prob / sum_prob
+
         # Multi-task share params
         if shared_links is not None:
+            _data_stat_protect = np.array(
+                [
+                    model_params["model_dict"][ii].get("data_stat_protect", 1e-2)
+                    for ii in model_params["model_dict"]
+                ]
+            )
+            assert np.allclose(_data_stat_protect, _data_stat_protect[0]), (
+                "Model key 'data_stat_protect' must be the same in each branch when multitask!"
+            )
             self.wrapper.share_params(
                 shared_links,
                 resume=(resuming and not self.finetune_update_stat) or self.rank != 0,
+                model_key_prob_map=dict(zip(self.model_keys, self.model_prob)),
+                data_stat_protect=_data_stat_protect[0],
             )
 
         if dist.is_available() and dist.is_initialized():
@@ -670,21 +696,6 @@ def warm_up_linear(step: int, warmup_steps: int) -> float:
         else:
             raise ValueError(f"Not supported optimizer type '{self.opt_type}'")
 
-        # Get model prob for multi-task
-        if self.multi_task:
-            self.model_prob = np.array([0.0 for key in self.model_keys])
-            if training_params.get("model_prob", None) is not None:
-                model_prob = training_params["model_prob"]
-                for ii, model_key in enumerate(self.model_keys):
-                    if model_key in model_prob:
-                        self.model_prob[ii] += float(model_prob[model_key])
-            else:
-                for ii, model_key in enumerate(self.model_keys):
-                    self.model_prob[ii] += float(len(self.training_data[model_key]))
-            sum_prob = np.sum(self.model_prob)
-            assert sum_prob > 0.0, "Sum of model prob must be larger than 0!"
-            self.model_prob = self.model_prob / sum_prob
-
         # Tensorboard
         self.enable_tensorboard = training_params.get("tensorboard", False)
         self.tensorboard_log_dir = training_params.get("tensorboard_log_dir", "log")
@@ -1337,12 +1348,18 @@ def print_on_training(
 def get_additional_data_requirement(_model: Any) -> list[DataRequirementItem]:
     additional_data_requirement = []
     if _model.get_dim_fparam() > 0:
+        _fparam_default = (
+            _model.get_default_fparam().cpu().numpy()
+            if _model.has_default_fparam()
+            else 0.0
+        )
         fparam_requirement_items = [
             DataRequirementItem(
                 "fparam",
                 _model.get_dim_fparam(),
                 atomic=False,
                 must=not _model.has_default_fparam(),
+                default=_fparam_default,
             )
         ]
         additional_data_requirement += fparam_requirement_items
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 392f928b0d..c65787958a 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -60,7 +60,13 @@ def __init__(
                     self.loss[task_key] = loss[task_key]
         self.inference_only = self.loss is None
 
-    def share_params(self, shared_links: dict[str, Any], resume: bool = False) -> None:
+    def share_params(
+        self,
+        shared_links: dict[str, Any],
+        model_key_prob_map: dict,
+        data_stat_protect: float = 1e-2,
+        resume: bool = False,
+    ) -> None:
         """
         Share the parameters of classes following rules defined in shared_links during multitask training.
         If not start from checkpoint (resume is False),
@@ -130,8 +136,16 @@ def share_params(self, shared_links: dict[str, Any], resume: bool = False) -> No
                         link_class = self.model[
                             model_key_link
                         ].atomic_model.__getattr__(class_type_link)
+                        frac_prob = (
+                            model_key_prob_map[model_key_link]
+                            / model_key_prob_map[model_key_base]
+                        )
                         link_class.share_params(
-                            base_class, shared_level_link, resume=resume
+                            base_class,
+                            shared_level_link,
+                            model_prob=frac_prob,
+                            protection=data_stat_protect,
+                            resume=resume,
                         )
                         log.warning(
                             f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!"
diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py
index e490e786f0..90d0d536c1 100644
--- a/deepmd/pt/utils/env.py
+++ b/deepmd/pt/utils/env.py
@@ -2,6 +2,7 @@
 import logging
 import multiprocessing
 import os
+import sys
 
 import numpy as np
 import torch
@@ -16,6 +17,17 @@
     set_default_nthreads,
 )
 
+log = logging.getLogger(__name__)
+
+if sys.platform != "win32":
+    try:
+        multiprocessing.set_start_method("fork", force=True)
+        log.debug("Successfully set multiprocessing start method to 'fork'.")
+    except (RuntimeError, ValueError) as err:
+        log.warning(f"Could not set multiprocessing start method: {err}")
+else:
+    log.debug("Skipping fork start method on Windows (not supported).")
+
 SAMPLER_RECORD = os.environ.get("SAMPLER_RECORD", False)
 DP_DTYPE_PROMOTION_STRICT = os.environ.get("DP_DTYPE_PROMOTION_STRICT", "0") == "1"
 try:
@@ -26,7 +38,6 @@
 NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(4, ncpus)))
 if multiprocessing.get_start_method() != "fork":
     # spawn or forkserver does not support NUM_WORKERS > 0 for DataLoader
-    log = logging.getLogger(__name__)
     log.warning(
         "NUM_WORKERS > 0 is not supported with spawn or forkserver start method. "
         "Setting NUM_WORKERS to 0."
diff --git a/deepmd/pt/utils/tabulate.py b/deepmd/pt/utils/tabulate.py
index b155a897da..a308f2d36b 100644
--- a/deepmd/pt/utils/tabulate.py
+++ b/deepmd/pt/utils/tabulate.py
@@ -46,7 +46,8 @@ class DPTabulate(BaseTabulate):
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     activation_function
-            The activation function in the embedding net. Supported options are {"tanh","gelu"} in common.ActivationFn.
+            The activation function in the embedding net. See :class:`ActivationFn`
+            for supported options (e.g. "tanh", "gelu", "relu", "silu").
     """
 
     def __init__(
@@ -66,12 +67,7 @@ def __init__(
         )
         self.descrpt_type = self._get_descrpt_type()
 
-        supported_descrpt_type = (
-            "Atten",
-            "A",
-            "T",
-            "R",
-        )
+        supported_descrpt_type = ("Atten", "A", "T", "T_TEBD", "R")
 
         if self.descrpt_type in supported_descrpt_type:
             self.sel_a = self.descrpt.get_sel()
@@ -89,6 +85,7 @@ def __init__(
             "relu6": 4,
             "softplus": 5,
             "sigmoid": 6,
+            "silu": 7,
         }
 
         activation = activation_fn.activation
@@ -156,7 +153,7 @@ def _make_data(self, xx: np.ndarray, idx: int) -> Any:
                         self.matrix["layer_" + str(layer + 1)][idx],
                         xbar,
                         self.functype,
-                    ) + torch.ones((1, 1), dtype=yy.dtype)  # pylint: disable=no-explicit-device
+                    ) + torch.ones((1, 1), dtype=yy.dtype, device=yy.device)
                     dy2 = unaggregated_dy2_dx_s(
                         yy - xx,
                         dy,
@@ -175,7 +172,7 @@ def _make_data(self, xx: np.ndarray, idx: int) -> Any:
                         self.matrix["layer_" + str(layer + 1)][idx],
                         xbar,
                         self.functype,
-                    ) + torch.ones((1, 2), dtype=yy.dtype)  # pylint: disable=no-explicit-device
+                    ) + torch.ones((1, 2), dtype=yy.dtype, device=yy.device)
                     dy2 = unaggregated_dy2_dx_s(
                         yy - tt,
                         dy,
@@ -311,6 +308,8 @@ def _get_descrpt_type(self) -> str:
             return "R"
         elif isinstance(self.descrpt, deepmd.pt.model.descriptor.DescrptSeT):
             return "T"
+        elif isinstance(self.descrpt, deepmd.pt.model.descriptor.DescrptSeTTebd):
+            return "T_TEBD"
         raise RuntimeError(f"Unsupported descriptor {self.descrpt}")
 
     def _get_layer_size(self) -> int:
@@ -325,7 +324,7 @@ def _get_layer_size(self) -> int:
                 * len(self.embedding_net_nodes[0])
                 * len(self.neuron)
             )
-        if self.descrpt_type == "Atten":
+        if self.descrpt_type in ("Atten", "T_TEBD"):
             layer_size = len(self.embedding_net_nodes[0]["layers"])
         elif self.descrpt_type == "A":
             layer_size = len(self.embedding_net_nodes[0]["layers"])
@@ -394,6 +393,13 @@ def _get_network_variable(self, var_name: str) -> dict:
                             "layers"
                         ][layer - 1]["@variables"][var_name]
                         result["layer_" + str(layer)].append(node)
+            elif self.descrpt_type == "T_TEBD":
+                # For the se_e3_tebd descriptor, a single,
+                # shared embedding network is used for all type pairs
+                node = self.embedding_net_nodes[0]["layers"][layer - 1]["@variables"][
+                    var_name
+                ]
+                result["layer_" + str(layer)].append(node)
             elif self.descrpt_type == "R":
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
@@ -464,6 +470,11 @@ def grad(xbar: torch.Tensor, y: torch.Tensor, functype: int) -> torch.Tensor:
     elif functype == 6:
         return y * (1 - y)
 
+    elif functype == 7:
+        # silu'(x) = sigmoid(x) * (1 + x * (1 - sigmoid(x)))
+        sig = torch.sigmoid(xbar)
+        return sig + xbar * sig * (1 - sig)
+
     else:
         raise ValueError(f"Unsupported function type: {functype}")
 
@@ -491,6 +502,12 @@ def grad_grad(xbar: torch.Tensor, y: torch.Tensor, functype: int) -> torch.Tenso
     elif functype == 6:
         return y * (1 - y) * (1 - 2 * y)
 
+    elif functype == 7:
+        sig = torch.sigmoid(xbar)
+        d_sig = sig * (1 - sig)
+        # silu''(x) = 2 * d_sig + x * d_sig * (1 - 2 * sig)
+        return 2 * d_sig + xbar * d_sig * (1 - 2 * sig)
+
     else:
         return -torch.ones_like(xbar)
 
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 308d39b0a3..5878ea473d 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -505,7 +505,7 @@ def descrpt_se_atten_common_args() -> list[Argument]:
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
     doc_attn = "The length of hidden vectors in attention layers"
-    doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and tebd_input_mode=='strip'"
+    doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` works for any attn_layer value (for pytorch backend only, for other backends, attn_layer=0 is still needed to compress) when tebd_input_mode=='strip'. When attn_layer!=0, only type embedding is compressed, geometric parts are not compressed."
     doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
     doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
 
@@ -2993,8 +2993,9 @@ def training_data_args() -> list[
     link_sys = make_link("systems", "training/training_data/systems")
     doc_systems = (
         "The data systems for training. "
-        "This key can be provided with a list that specifies the systems, or be provided with a string "
-        "by which the prefix of all systems are given and the list of the systems is automatically generated."
+        "This key can be a list or a str. "
+        "When provided as a string, it can be a system directory path (containing 'type.raw') or a parent directory path to recursively search for all system subdirectories. "
+        "When provided as a list, each string item in the list is processed the same way as individual string inputs, i.e., each path can be a system directory or a parent directory to recursively search for all system subdirectories."
     )
     doc_patterns = (
         "The customized patterns used in `rglob` to collect all training systems. "
@@ -3074,8 +3075,9 @@ def validation_data_args() -> list[
     link_sys = make_link("systems", "training/validation_data/systems")
     doc_systems = (
         "The data systems for validation. "
-        "This key can be provided with a list that specifies the systems, or be provided with a string "
-        "by which the prefix of all systems are given and the list of the systems is automatically generated."
+        "This key can be a list or a str. "
+        "When provided as a string, it can be a system directory path (containing 'type.raw') or a parent directory path to recursively search for all system subdirectories. "
+        "When provided as a list, each string item in the list is processed the same way as individual string inputs, i.e., each path can be a system directory or a parent directory to recursively search for all system subdirectories."
     )
     doc_patterns = (
         "The customized patterns used in `rglob` to collect all validation systems. "
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 9b93c64507..26a27c82d7 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -2,10 +2,19 @@
 
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import bisect
+import functools
 import logging
+from concurrent.futures import (
+    ThreadPoolExecutor,
+    as_completed,
+)
+from pathlib import (
+    Path,
+)
 from typing import (
     Any,
     Optional,
+    Union,
 )
 
 import numpy as np
@@ -13,9 +22,11 @@
 from deepmd.env import (
     GLOBAL_ENER_FLOAT_PRECISION,
     GLOBAL_NP_FLOAT_PRECISION,
+    LRU_CACHE_SIZE,
 )
 from deepmd.utils import random as dp_random
 from deepmd.utils.path import (
+    DPH5Path,
     DPPath,
 )
 
@@ -68,10 +79,7 @@ def __init__(
             raise FileNotFoundError(f"No {set_prefix}.* is found in {sys_path}")
         self.dirs.sort()
         # check mix_type format
-        error_format_msg = (
-            "if one of the set is of mixed_type format, "
-            "then all of the sets in this system should be of mixed_type format!"
-        )
+        error_format_msg = "if one of the set is of mixed_type format, then all of the sets in this system should be of mixed_type format!"
         self.mixed_type = self._check_mode(self.dirs[0])
         for set_item in self.dirs[1:]:
             assert self._check_mode(set_item) == self.mixed_type, error_format_msg
@@ -96,10 +104,10 @@ def __init__(
                     f"Elements {missing_elements} are not present in the provided `type_map`."
                 )
             if not self.mixed_type:
-                atom_type_ = [
-                    type_map.index(self.type_map[ii]) for ii in self.atom_type
-                ]
-                self.atom_type = np.array(atom_type_, dtype=np.int32)
+                old_to_new_type_idx = np.array(
+                    [type_map.index(name) for name in self.type_map], dtype=np.int32
+                )
+                self.atom_type = old_to_new_type_idx[self.atom_type].astype(np.int32)
             else:
                 self.enforce_type_map = True
                 sorter = np.argsort(type_map)
@@ -128,8 +136,7 @@ def __init__(
         self.shuffle_test = shuffle_test
         # set modifier
         self.modifier = modifier
-        # calculate prefix sum for get_item method
-        frames_list = [self._get_nframes(item) for item in self.dirs]
+        frames_list = [self._get_nframes(set_name) for set_name in self.dirs]
         self.nframes = np.sum(frames_list)
         # The prefix sum stores the range of indices contained in each directory, which is needed by get_item method
         self.prefix_sum = np.cumsum(frames_list).tolist()
@@ -248,27 +255,18 @@ def get_item_torch(self, index: int) -> dict:
         index
             index of the frame
         """
-        i = bisect.bisect_right(self.prefix_sum, index)
-        frames = self._load_set(self.dirs[i])
-        frame = self._get_subdata(frames, index - self.prefix_sum[i])
-        frame = self.reformat_data_torch(frame)
-        frame["fid"] = index
-        return frame
+        return self.get_single_frame(index)
 
     def get_item_paddle(self, index: int) -> dict:
         """Get a single frame data . The frame is picked from the data system by index. The index is coded across all the sets.
+        Same with PyTorch backend.
 
         Parameters
         ----------
         index
             index of the frame
         """
-        i = bisect.bisect_right(self.prefix_sum, index)
-        frames = self._load_set(self.dirs[i])
-        frame = self._get_subdata(frames, index - self.prefix_sum[i])
-        frame = self.reformat_data_torch(frame)
-        frame["fid"] = index
-        return frame
+        return self.get_single_frame(index)
 
     def get_batch(self, batch_size: int) -> dict:
         """Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system.
@@ -340,8 +338,10 @@ def get_numb_set(self) -> int:
 
     def get_numb_batch(self, batch_size: int, set_idx: int) -> int:
         """Get the number of batches in a set."""
-        data = self._load_set(self.dirs[set_idx])
-        ret = data["coord"].shape[0] // batch_size
+        set_name = self.dirs[set_idx]
+        # Directly obtain the number of frames to avoid loading the entire dataset
+        nframes = self._get_nframes(set_name)
+        ret = nframes // batch_size
         if ret == 0:
             ret = 1
         return ret
@@ -377,6 +377,103 @@ def get_natoms_vec(self, ntypes: int) -> np.ndarray:
         tmp = np.append(tmp, natoms_vec)
         return tmp.astype(np.int32)
 
+    def get_single_frame(self, index: int) -> dict:
+        """Orchestrates loading a single frame efficiently using memmap."""
+        if index < 0 or index >= self.nframes:
+            raise IndexError(f"Frame index {index} out of range [0, {self.nframes})")
+        # 1. Find the correct set directory and local frame index
+        set_idx = bisect.bisect_right(self.prefix_sum, index)
+        set_dir = self.dirs[set_idx]
+        if not isinstance(set_dir, DPPath):
+            set_dir = DPPath(set_dir)
+        # Calculate local index within the set.* directory
+        local_idx = index - (0 if set_idx == 0 else self.prefix_sum[set_idx - 1])
+        # Calculate the number of frames in this set to avoid redundant _get_nframes calls
+        set_nframes = (
+            self.prefix_sum[set_idx]
+            if set_idx == 0
+            else self.prefix_sum[set_idx] - self.prefix_sum[set_idx - 1]
+        )
+
+        frame_data = {}
+        # 2. Concurrently load all non-reduced items
+        non_reduced_keys = [k for k, v in self.data_dict.items() if v["reduce"] is None]
+        reduced_keys = [k for k, v in self.data_dict.items() if v["reduce"] is not None]
+        # Use a thread pool to parallelize loading
+        if non_reduced_keys:
+            with ThreadPoolExecutor(max_workers=len(non_reduced_keys)) as executor:
+                future_to_key = {
+                    executor.submit(
+                        self._load_single_data, set_dir, key, local_idx, set_nframes
+                    ): key
+                    for key in non_reduced_keys
+                }
+                for future in as_completed(future_to_key):
+                    key = future_to_key[future]
+                    frame_data["find_" + key], frame_data[key] = future.result()
+
+        # 3. Compute reduced items from already loaded data
+        for key in reduced_keys:
+            vv = self.data_dict[key]
+            k_in = vv["reduce"]
+            ndof = vv["ndof"]
+            frame_data["find_" + key] = frame_data["find_" + k_in]
+            # Reshape to (natoms, ndof) and sum over atom axis
+            tmp_in = (
+                frame_data[k_in].reshape(-1, ndof).astype(GLOBAL_ENER_FLOAT_PRECISION)
+            )
+            frame_data[key] = np.sum(tmp_in, axis=0)
+
+        # 4. Handle atom types (mixed or standard)
+        if self.mixed_type:
+            type_path = set_dir / "real_atom_types.npy"
+            # For HDF5 files, use load_numpy; for filesystem, use memmap
+            if isinstance(type_path, DPH5Path):
+                mmap_types = type_path.load_numpy()
+            else:
+                mmap_types = self._get_memmap(type_path)
+            real_type = mmap_types[local_idx].copy().astype(np.int32)
+
+            if self.enforce_type_map:
+                try:
+                    real_type = self.type_idx_map[real_type].astype(np.int32)
+                except IndexError as e:
+                    raise IndexError(
+                        f"some types in 'real_atom_types.npy' of set {set_dir} are not contained in {self.get_ntypes()} types!"
+                    ) from e
+
+            frame_data["type"] = real_type
+            ntypes = self.get_ntypes()
+            natoms = len(real_type)
+            # Use bincount for efficient counting of each type
+            natoms_vec = np.bincount(
+                real_type[real_type >= 0], minlength=ntypes
+            ).astype(np.int32)
+            frame_data["real_natoms_vec"] = np.concatenate(
+                (np.array([natoms, natoms], dtype=np.int32), natoms_vec)
+            )
+        else:
+            frame_data["type"] = self.atom_type[self.idx_map]
+
+        # 5. Standardize keys
+        frame_data = {kk.replace("atomic", "atom"): vv for kk, vv in frame_data.items()}
+
+        # 6. Reshape atomic data to match expected format [natoms, ndof]
+        for kk in self.data_dict.keys():
+            if (
+                "find_" not in kk
+                and kk in frame_data
+                and not self.data_dict[kk]["atomic"]
+            ):
+                frame_data[kk] = frame_data[kk].reshape(-1)
+        frame_data["atype"] = frame_data["type"]
+
+        if not self.pbc:
+            frame_data["box"] = None
+
+        frame_data["fid"] = index
+        return frame_data
+
     def avg(self, key: str) -> float:
         """Return the average value of an item."""
         if key not in self.data_dict.keys():
@@ -395,11 +492,9 @@ def avg(self, key: str) -> float:
             return np.average(eners, axis=0)
 
     def _idx_map_sel(self, atom_type: np.ndarray, type_sel: list[int]) -> np.ndarray:
-        new_types = []
-        for ii in atom_type:
-            if ii in type_sel:
-                new_types.append(ii)
-        new_types = np.array(new_types, dtype=int)
+        # Use vectorized operations instead of Python loop
+        sel_mask = np.isin(atom_type, type_sel)
+        new_types = atom_type[sel_mask]
         natoms = new_types.shape[0]
         idx = np.arange(natoms, dtype=np.int64)
         idx_map = np.lexsort((idx, new_types))
@@ -413,6 +508,15 @@ def _get_natoms_2(self, ntypes: int) -> tuple[int, np.ndarray]:
             natoms_vec[ii] = np.count_nonzero(sample_type == ii)
         return natoms, natoms_vec
 
+    def _get_memmap(self, path: DPPath) -> np.memmap:
+        """Get or create a memory-mapped object for a given npy file.
+        Uses file path and modification time as cache keys to detect file changes
+        and invalidate cache when files are modified.
+        """
+        abs_path = Path(str(path)).absolute()
+        file_mtime = abs_path.stat().st_mtime
+        return self._create_memmap(str(abs_path), str(file_mtime))
+
     def _get_subdata(
         self, data: dict[str, Any], idx: Optional[np.ndarray] = None
     ) -> dict[str, Any]:
@@ -476,18 +580,27 @@ def _shuffle_data(self, data: dict[str, Any]) -> dict[str, Any]:
                 ret[kk] = data[kk]
         return ret, idx
 
-    def _get_nframes(self, set_name: DPPath) -> int:
-        # get nframes
+    def _get_nframes(self, set_name: Union[DPPath, str]) -> int:
         if not isinstance(set_name, DPPath):
             set_name = DPPath(set_name)
         path = set_name / "coord.npy"
-        if self.data_dict["coord"]["high_prec"]:
-            coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+        if isinstance(set_name, DPH5Path):
+            nframes = path.root[path._name].shape[0]
         else:
-            coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
-        if coord.ndim == 1:
-            coord = coord.reshape([1, -1])
-        nframes = coord.shape[0]
+            # Read only the header to get shape
+            with open(str(path), "rb") as f:
+                version = np.lib.format.read_magic(f)
+                if version[0] == 1:
+                    shape, _fortran_order, _dtype = np.lib.format.read_array_header_1_0(
+                        f
+                    )
+                elif version[0] in [2, 3]:
+                    shape, _fortran_order, _dtype = np.lib.format.read_array_header_2_0(
+                        f
+                    )
+                else:
+                    raise ValueError(f"Unsupported .npy file version: {version}")
+            nframes = shape[0] if len(shape) > 1 else 1
         return nframes
 
     def reformat_data_torch(self, data: dict[str, Any]) -> dict[str, Any]:
@@ -614,9 +727,9 @@ def _load_data(
             idx_map = self.idx_map
             # if type_sel, then revise natoms and idx_map
             if type_sel is not None:
-                natoms_sel = 0
-                for jj in type_sel:
-                    natoms_sel += np.sum(self.atom_type == jj)
+                # Use vectorized operations for better performance
+                sel_mask = np.isin(self.atom_type, type_sel)
+                natoms_sel = np.sum(sel_mask)
                 idx_map_sel = self._idx_map_sel(self.atom_type, type_sel)
             else:
                 natoms_sel = natoms
@@ -644,7 +757,6 @@ def _load_data(
                                 tmp = np.zeros(
                                     [nframes, natoms, ndof_], dtype=data.dtype
                                 )
-                                sel_mask = np.isin(self.atom_type, type_sel)
                                 tmp[:, sel_mask] = data.reshape(
                                     [nframes, natoms_sel, ndof_]
                                 )
@@ -657,7 +769,6 @@ def _load_data(
                             if output_natoms_for_type_sel:
                                 pass
                             else:
-                                sel_mask = np.isin(self.atom_type, type_sel)
                                 data = data.reshape([nframes, natoms, ndof_])
                                 data = data[:, sel_mask]
                                 natoms = natoms_sel
@@ -690,7 +801,7 @@ def _load_data(
                         data = data.reshape([nframes, -1])
                 data = np.reshape(data, [nframes, ndof])
             except ValueError as err_message:
-                explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
+                explanation = "This error may occur when your label mismatch its name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
                 log.error(str(err_message))
                 log.error(explanation)
                 raise ValueError(str(err_message) + ". " + explanation) from err_message
@@ -707,6 +818,147 @@ def _load_data(
                 data = np.repeat(data, repeat).reshape([nframes, -1])
             return np.float32(0.0), data
 
+    def _load_single_data(
+        self, set_dir: DPPath, key: str, frame_idx: int, set_nframes: int
+    ) -> tuple[np.float32, np.ndarray]:
+        """
+        Loads and processes data for a SINGLE frame from a SINGLE key,
+        fully replicating the logic from the original _load_data method.
+
+        Parameters
+        ----------
+        set_dir : DPPath
+            The directory path of the set
+        key : str
+            The key name of the data to load
+        frame_idx : int
+            The local frame index within the set
+        set_nframes : int
+            The total number of frames in this set (to avoid redundant _get_nframes calls)
+        """
+        vv = self.data_dict[key]
+        path = set_dir / (key + ".npy")
+
+        if vv["atomic"]:
+            natoms = self.natoms
+            idx_map = self.idx_map
+            # if type_sel, then revise natoms and idx_map
+            if vv["type_sel"] is not None:
+                # Use vectorized operations for better performance
+                sel_mask = np.isin(self.atom_type, vv["type_sel"])
+                natoms_sel = np.sum(sel_mask)
+                idx_map_sel = self._idx_map_sel(self.atom_type, vv["type_sel"])
+            else:
+                natoms_sel = natoms
+                idx_map_sel = idx_map
+        else:
+            natoms = 1
+            natoms_sel = 0
+            idx_map_sel = None
+        ndof = vv["ndof"]
+
+        # Determine target data type from requirements
+        dtype = vv.get("dtype")
+        if dtype is None:
+            dtype = (
+                GLOBAL_ENER_FLOAT_PRECISION
+                if vv.get("high_prec")
+                else GLOBAL_NP_FLOAT_PRECISION
+            )
+
+        # Branch 1: File does not exist
+        if not path.is_file():
+            if vv.get("must"):
+                raise RuntimeError(f"{path} not found!")
+
+            # Create a default array based on requirements
+            if vv["atomic"]:
+                if vv["type_sel"] is not None and not vv["output_natoms_for_type_sel"]:
+                    natoms = natoms_sel
+                data = np.full([natoms, ndof], vv["default"], dtype=dtype)
+            else:
+                # For non-atomic data, shape should be [ndof]
+                data = np.full([ndof], vv["default"], dtype=dtype)
+            return np.float32(0.0), data
+
+        # Branch 2: Data loading
+        if isinstance(path, DPH5Path):
+            # For HDF5 files, use load_numpy which handles HDF5 datasets
+            mmap_obj = path.load_numpy().astype(dtype)
+        else:
+            # For filesystem paths, use memmap for better performance
+            mmap_obj = self._get_memmap(path)
+
+        # corner case: single frame
+        if set_nframes == 1:
+            mmap_obj = mmap_obj[None, ...]
+        # Slice the single frame and make an in-memory copy for modification
+        data = mmap_obj[frame_idx].copy().astype(dtype, copy=False)
+
+        try:
+            if vv["atomic"]:
+                # Handle type_sel logic
+                if vv["type_sel"] is not None:
+                    if mmap_obj.shape[1] == natoms_sel * ndof:
+                        if vv["output_natoms_for_type_sel"]:
+                            tmp = np.zeros([natoms, ndof], dtype=data.dtype)
+                            # sel_mask needs to be applied to the original atom layout
+                            tmp[sel_mask] = data.reshape([natoms_sel, ndof])
+                            data = tmp
+                        else:  # output is natoms_sel
+                            natoms = natoms_sel
+                            idx_map = idx_map_sel
+                    elif mmap_obj.shape[1] == natoms * ndof:
+                        data = data.reshape([natoms, ndof])
+                        if vv["output_natoms_for_type_sel"]:
+                            pass
+                        else:
+                            data = data[sel_mask]
+                            idx_map = idx_map_sel
+                            natoms = natoms_sel
+                    else:  # Shape mismatch error
+                        raise ValueError(
+                            f"The shape of the data {key} in {set_dir} has width {mmap_obj.shape[1]}, which doesn't match either ({natoms_sel * ndof}) or ({natoms * ndof})"
+                        )
+
+                # Handle special case for Hessian
+                if key == "hessian":
+                    data = data.reshape(3 * natoms, 3 * natoms)
+                    num_chunks, chunk_size = len(idx_map), 3
+                    idx_map_hess = np.arange(
+                        num_chunks * chunk_size, dtype=int
+                    ).reshape(num_chunks, chunk_size)
+                    idx_map_hess = idx_map_hess[idx_map].flatten()
+                    data = data[idx_map_hess, :]
+                    data = data[:, idx_map_hess]
+                    data = data.reshape(-1)
+                    # size of hessian is 3Natoms * 3Natoms
+                    # ndof = 3 * ndof * 3 * ndof
+                else:
+                    # data should be 2D here: [natoms, ndof]
+                    data = data.reshape([natoms, -1])
+                    data = data[idx_map, :]
+            else:
+                data = data.reshape([ndof])
+
+            # Atomic: return [natoms, ndof] or flattened hessian above
+            # Non-atomic: return [ndof]
+            return np.float32(1.0), data
+
+        except ValueError as err_message:
+            explanation = (
+                "This error may occur when your label mismatches its name, "
+                "e.g., global tensor stored in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
+            )
+            log.exception(
+                "Single-frame load failed for key=%s, set=%s, frame=%d. %s",
+                key,
+                set_dir,
+                frame_idx,
+                explanation,
+            )
+            raise ValueError(f"{err_message}. {explanation}") from err_message
+
     def _load_type(self, sys_path: DPPath) -> np.ndarray:
         atom_type = (sys_path / "type.raw").load_txt(ndmin=1).astype(np.int32)
         return atom_type
@@ -741,6 +993,34 @@ def _check_pbc(self, sys_path: DPPath) -> bool:
     def _check_mode(self, set_path: DPPath) -> bool:
         return (set_path / "real_atom_types.npy").is_file()
 
+    @staticmethod
+    @functools.lru_cache(maxsize=LRU_CACHE_SIZE)
+    def _create_memmap(path_str: str, mtime_str: str) -> np.memmap:
+        """A cached helper function to create memmap objects.
+        Using lru_cache to limit the number of open file handles.
+
+        Parameters
+        ----------
+        path_str
+            The file path as a string.
+        mtime_str
+            The modification time as a string, used for cache invalidation.
+        """
+        with open(path_str, "rb") as f:
+            version = np.lib.format.read_magic(f)
+            if version[0] == 1:
+                shape, fortran_order, dtype = np.lib.format.read_array_header_1_0(f)
+            elif version[0] in [2, 3]:
+                shape, fortran_order, dtype = np.lib.format.read_array_header_2_0(f)
+            else:
+                raise ValueError(f"Unsupported .npy file version: {version}")
+            offset = f.tell()
+        order = "F" if fortran_order else "C"
+        # Create a read-only memmap
+        return np.memmap(
+            path_str, dtype=dtype, mode="r", shape=shape, order=order, offset=offset
+        )
+
 
 class DataRequirementItem:
     """A class to store the data requirement for data systems.
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index cf6e81aad1..4f22b3c380 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -790,6 +790,7 @@ def process_systems(
     """Process the user-input systems.
 
     If it is a single directory, search for all the systems in the directory.
+    If it is a list, each item in the list is treated as a directory to search.
     Check if the systems are valid.
 
     Parameters
@@ -801,17 +802,31 @@ def process_systems(
 
     Returns
     -------
-    list of str
+    result_systems: list of str
         The valid systems
     """
+    # Normalize input to a list of paths to search
     if isinstance(systems, str):
+        search_paths = [systems]
+    elif isinstance(systems, list):
+        search_paths = systems
+    else:
+        # Handle unsupported input types
+        raise ValueError(
+            f"Invalid systems type: {type(systems)}. Must be str or list[str]."
+        )
+
+    # Iterate over the search_paths list and apply expansion logic to each path
+    result_systems = []
+    for path in search_paths:
         if patterns is None:
-            systems = expand_sys_str(systems)
+            expanded_paths = expand_sys_str(path)
         else:
-            systems = rglob_sys_str(systems, patterns)
-    elif isinstance(systems, list):
-        systems = systems.copy()
-    return systems
+            expanded_paths = rglob_sys_str(path, patterns)
+
+        result_systems.extend(expanded_paths)
+
+    return result_systems
 
 
 def get_data(
diff --git a/deepmd/utils/env_mat_stat.py b/deepmd/utils/env_mat_stat.py
index ecc0b7b62f..a1f2ba6966 100644
--- a/deepmd/utils/env_mat_stat.py
+++ b/deepmd/utils/env_mat_stat.py
@@ -28,7 +28,7 @@ class StatItem:
 
     Parameters
     ----------
-    number : int
+    number : float
         The total size of given array.
     sum : float
         The sum value of the matrix.
@@ -36,7 +36,9 @@ class StatItem:
         The sum squared value of the matrix.
     """
 
-    def __init__(self, number: int = 0, sum: float = 0, squared_sum: float = 0) -> None:
+    def __init__(
+        self, number: float = 0, sum: float = 0, squared_sum: float = 0
+    ) -> None:
         self.number = number
         self.sum = sum
         self.squared_sum = squared_sum
@@ -48,6 +50,13 @@ def __add__(self, other: "StatItem") -> "StatItem":
             squared_sum=self.squared_sum + other.squared_sum,
         )
 
+    def __mul__(self, scalar: float) -> "StatItem":
+        return StatItem(
+            number=self.number * scalar,
+            sum=self.sum * scalar,
+            squared_sum=self.squared_sum * scalar,
+        )
+
     def compute_avg(self, default: float = 0) -> float:
         """Compute the average of the environment matrix.
 
diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py
index d15794a9e5..4daae5d471 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/utils/tabulate.py
@@ -197,6 +197,48 @@ def build(
                         nspline[ii][0] if self.is_pt else nspline[ii],
                     )
                     idx += 1
+        elif self.descrpt_type == "T_TEBD":
+            # 1. Find the global range [ll, uu] of cos(theta) across all types
+            uu = np.max(upper)
+            ll = np.min(lower)
+
+            # 2. Create a unique input grid xx for this shared geometric network based on the global range
+            xx = np.arange(extrapolate * ll, ll, stride1, dtype=self.data_type)
+            xx = np.append(
+                xx,
+                np.arange(ll, uu, stride0, dtype=self.data_type),
+            )
+            xx = np.append(
+                xx,
+                np.arange(
+                    uu,
+                    extrapolate * uu,
+                    stride1,
+                    dtype=self.data_type,
+                ),
+            )
+            xx = np.append(xx, np.array([extrapolate * uu], dtype=self.data_type))
+
+            # 3. Calculate the number of spline points
+            nspline = (
+                (uu - ll) / stride0
+                + ((extrapolate * uu - uu) / stride1)
+                + ((ll - extrapolate * ll) / stride1)
+            ).astype(int)
+
+            # 4. Call _build_lower only once to generate the table for this shared network
+            geometric_net_name = "filter_net"
+            self._build_lower(
+                geometric_net_name,
+                xx,
+                0,
+                uu,
+                ll,
+                stride0,
+                stride1,
+                extrapolate,
+                nspline,
+            )
         elif self.descrpt_type == "R":
             for ii in range(self.table_size):
                 if (self.type_one_side and not self._all_excluded(ii)) or (
@@ -242,9 +284,10 @@ def build(
             self._convert_numpy_float_to_int()
         return self.lower, self.upper
 
+    # generate_spline_table
     def _build_lower(
         self,
-        net: int,
+        net: str,
         xx: np.ndarray,
         idx: int,
         upper: float,
@@ -260,21 +303,14 @@ def _build_lower(
         )
 
         # tt.shape: [nspline, self.last_layer_size]
-        if self.descrpt_type in ("Atten", "A", "AEbdV2"):
+        if self.descrpt_type in ("Atten", "A", "AEbdV2", "R"):
             tt = np.full((nspline, self.last_layer_size), stride1)  # pylint: disable=no-explicit-dtype
             tt[: int((upper - lower) / stride0), :] = stride0
-        elif self.descrpt_type == "T":
+        elif self.descrpt_type in ("T", "T_TEBD"):
             tt = np.full((nspline, self.last_layer_size), stride1)  # pylint: disable=no-explicit-dtype
-            tt[
-                int((lower - extrapolate * lower) / stride1) + 1 : (
-                    int((lower - extrapolate * lower) / stride1)
-                    + int((upper - lower) / stride0)
-                ),
-                :,
-            ] = stride0
-        elif self.descrpt_type == "R":
-            tt = np.full((nspline, self.last_layer_size), stride1)  # pylint: disable=no-explicit-dtype
-            tt[: int((upper - lower) / stride0), :] = stride0
+            start_index = int((lower - extrapolate * lower) / stride1) + 1
+            end_index = start_index + int((upper - lower) / stride0)
+            tt[start_index:end_index, :] = stride0
         else:
             raise RuntimeError("Unsupported descriptor")
 
@@ -394,7 +430,7 @@ def _get_layer_size(self) -> int:
 
     def _get_table_size(self) -> int:
         table_size = 0
-        if self.descrpt_type in ("Atten", "AEbdV2"):
+        if self.descrpt_type in ("Atten", "AEbdV2", "T_TEBD"):
             table_size = 1
         elif self.descrpt_type == "A":
             table_size = self.ntypes * self.ntypes
@@ -448,7 +484,7 @@ def _get_env_mat_range(self, min_nbor_dist: float) -> tuple[np.ndarray, np.ndarr
         if self.descrpt_type in ("Atten", "A", "AEbdV2"):
             lower = -self.davg[:, 0] / self.dstd[:, 0]
             upper = ((1 / min_nbor_dist) * sw - self.davg[:, 0]) / self.dstd[:, 0]
-        elif self.descrpt_type == "T":
+        elif self.descrpt_type in ("T", "T_TEBD"):
             var = np.square(sw / (min_nbor_dist * self.dstd[:, 1:4]))
             lower = np.min(-var, axis=1)
             upper = np.max(var, axis=1)
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index a21b8913db..7f38ae77ac 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -273,6 +273,12 @@ It will print the help information like
 
 ### Install horovod and mpi4py {{ tensorflow_icon }}
 
+:::{warning}
+Horovod has not released a new version for a long time.
+As of December 2025, the latest Horovod release does not support the latest TensorFlow versions.
+You can check the patches required to support the latest TensorFlow at [conda-forge/horovod-feedstock](https://github.com/conda-forge/horovod-feedstock/blob/main/recipe/meta.yaml).
+:::
+
 [Horovod](https://github.com/horovod/horovod) and [mpi4py](https://github.com/mpi4py/mpi4py) are used for parallel training. For better performance on GPU, please follow the tuning steps in [Horovod on GPU](https://github.com/horovod/horovod/blob/master/docs/gpus.rst).
 
 ```bash
@@ -377,7 +383,7 @@ mkdir build
 cd build
 ```
 
-The installation requires CMake 3.16 or later for the CPU version, CMake 3.23 or later for the CUDA support, and CMake 3.21 or later for the ROCM support. One can install CMake via `pip` if it is not installed or the installed version does not satisfy the requirement:
+The installation requires CMake 3.25.2 or later for all platforms (CPU, CUDA, and ROCM). One can install CMake via `pip` if it is not installed or the installed version does not satisfy the requirement:
 
 ```sh
 pip install -U cmake
diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md
index b2a88db240..cb65188002 100644
--- a/doc/install/install-lammps.md
+++ b/doc/install/install-lammps.md
@@ -17,11 +17,11 @@ DeePMD-kit will generate a module called `USER-DEEPMD` in the `build` directory,
 
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_22Jul2025_update1.tar.gz
-tar xf stable_22Jul2025_update1.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_22Jul2025_update2.tar.gz
+tar xf stable_22Jul2025_update2.tar.gz
 ```
 
-The source code of LAMMPS is stored in the directory `lammps-stable_22Jul2025_update1`.
+The source code of LAMMPS is stored in the directory `lammps-stable_22Jul2025_update2`.
 
 Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either make or CMake.
 
@@ -30,7 +30,7 @@ Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either mak
 Now go into the LAMMPS code and copy the DeePMD-kit module like this
 
 ```bash
-cd lammps-stable_22Jul2025_update1/src/
+cd lammps-stable_22Jul2025_update2/src/
 cp -r $deepmd_source_dir/source/build/USER-DEEPMD .
 make yes-kspace
 make yes-extra-fix
@@ -60,8 +60,8 @@ make no-user-deepmd
 Now go into the LAMMPS directory and create a directory called `build`:
 
 ```bash
-mkdir -p lammps-stable_22Jul2025_update1/build/
-cd lammps-stable_22Jul2025_update1/build/
+mkdir -p lammps-stable_22Jul2025_update2/build/
+cd lammps-stable_22Jul2025_update2/build/
 ```
 
 Patch the LAMMPS `CMakeLists.txt` file:
@@ -94,15 +94,15 @@ Now download the LAMMPS code (`8Apr2021` or later), and uncompress it:
 
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_22Jul2025_update1.tar.gz
-tar xf stable_22Jul2025_update1.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_22Jul2025_update2.tar.gz
+tar xf stable_22Jul2025_update2.tar.gz
 ```
 
-The source code of LAMMPS is stored in the directory `lammps-stable_22Jul2025_update1`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build`
+The source code of LAMMPS is stored in the directory `lammps-stable_22Jul2025_update2`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build`
 
 ```bash
-mkdir -p lammps-stable_22Jul2025_update1/build/
-cd lammps-stable_22Jul2025_update1/build/
+mkdir -p lammps-stable_22Jul2025_update2/build/
+cd lammps-stable_22Jul2025_update2/build/
 ```
 
 Now build LAMMPS. Note that `PLUGIN` must be enabled, and `BUILD_SHARED_LIBS` must be set to `yes`. You can install any other package you want.
diff --git a/doc/model/dpa2.md b/doc/model/dpa2.md
index c8e60c514a..466a4de4f2 100644
--- a/doc/model/dpa2.md
+++ b/doc/model/dpa2.md
@@ -38,6 +38,10 @@ Type embedding is within this descriptor with the {ref}`tebd_dim <model[standard
 
 ## Model compression
 
-Model compression is supported when {ref}`repinit/tebd_input_mode <model[standard]/descriptor[dpa2]/repinit/tebd_input_mode>` is `strip`, but only the `repinit` part is compressed.
+Model compression is supported when {ref}`repinit/tebd_input_mode <model[standard]/descriptor[dpa2]/repinit/tebd_input_mode>` is `strip`.
+
+- If {ref}`repinit/attn_layer <model[standard]/descriptor[dpa2]/repinit/attn_layer>` is `0`, both the type embedding and geometric parts inside `repinit` are compressed.
+- If `repinit/attn_layer` is not `0`, only the type embedding tables are compressed and the geometric attention layers remain as neural networks.
+
 An example is given in `examples/water/dpa2/input_torch_compressible.json`.
 The performance improvement will be limited if other parts are more expensive.
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
index 6c0ca0817c..2e0c236cf6 100644
--- a/doc/model/train-se-atten.md
+++ b/doc/model/train-se-atten.md
@@ -134,7 +134,9 @@ You can use descriptor `"se_atten_v2"` and is not allowed to set `tebd_input_mod
 
 Practical evidence demonstrates that `"se_atten_v2"` offers better and more stable performance compared to `"se_atten"`.
 
-Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model[standard]/descriptor[se_atten_v2]/attn_layer>` set to 0.
+:::{note}
+Model compression support differs across backends. See [Model compression](#model-compression) for backend-specific requirements.
+:::
 
 ## Type embedding
 
@@ -182,7 +184,13 @@ DPA-1 supports both the [standard data format](../data/system.md) and the [mixed
 
 ## Model compression
 
-Model compression is supported only when there is no attention layer (`attn_layer` is 0) and `tebd_input_mode` is `strip`.
+### TensorFlow {{ tensorflow_icon }}
+
+Model compression is supported only when the descriptor attention depth {ref}`attn_layer <model[standard]/descriptor[se_atten]/attn_layer>` is 0 and {ref}`tebd_input_mode <model[standard]/descriptor[se_atten]/tebd_input_mode>` is `"strip"`. Attention layers higher than 0 cannot be compressed in the TensorFlow implementation because the geometric part is tabulated from the static computation graph.
+
+### PyTorch {{ pytorch_icon }}
+
+Model compression is supported for any {ref}`attn_layer <model[standard]/descriptor[se_atten_v2]/attn_layer>` value when {ref}`tebd_input_mode <model[standard]/descriptor[se_atten_v2]/tebd_input_mode>` is `"strip"`. When `attn_layer` is 0, both the type embedding and geometric parts are compressed. When `attn_layer` is not 0, only the type embedding is compressed while the geometric part keeps the neural network implementation (a warning is emitted during compression).
 
 ## Training example
 
diff --git a/doc/model/train-se-e3-tebd.md b/doc/model/train-se-e3-tebd.md
index 3e0b6f9c5a..f0001f4e67 100644
--- a/doc/model/train-se-e3-tebd.md
+++ b/doc/model/train-se-e3-tebd.md
@@ -83,4 +83,4 @@ Type embedding is within this descriptor with the {ref}`tebd_dim <model[standard
 
 ## Model compression
 
-Model compression is not supported.
+Model compression is supported only when `tebd_input_mode` is `strip`.
diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index 174c39d6d9..af4b4b31d9 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -76,8 +76,8 @@ Other training parameters are given in the {ref}`training <training>` section.
 The sections {ref}`training_data <training/training_data>` and {ref}`validation_data <training/validation_data>` give the training dataset and validation dataset, respectively. Taking the training dataset for example, the keys are explained below:
 
 - {ref}`systems <training/training_data/systems>` provide paths of the training data systems. DeePMD-kit allows you to provide multiple systems with different numbers of atoms. This key can be a `list` or a `str`.
-  - `list`: {ref}`systems <training/training_data/systems>` gives the training data systems.
-  - `str`: {ref}`systems <training/training_data/systems>` should be a valid path. DeePMD-kit will recursively search all data systems in this path.
+  - `str`: {ref}`systems <training/training_data/systems>` should be a valid path. It can be a system directory path (containing 'type.raw') or a parent directory path to recursively search for all system subdirectories.
+  - `list`: {ref}`systems <training/training_data/systems>` gives a list of paths. Each string item in the list is processed the same way as individual string inputs, i.e., each path can be a system directory or a parent directory to recursively search for all system subdirectories.
 - At each training step, DeePMD-kit randomly picks {ref}`batch_size <training/training_data/batch_size>` frame(s) from one of the systems. The probability of using a system is by default in proportion to the number of batches in the system. More options are available for automatically determining the probability of using systems. One can set the key {ref}`auto_prob <training/training_data/auto_prob>` to
   - `"prob_uniform"` all systems are used with the same probability.
   - `"prob_sys_size"` the probability of using a system is proportional to its size (number of frames).
diff --git a/examples/infer_water/CMakeLists.txt b/examples/infer_water/CMakeLists.txt
index 27b7541c2d..c82816c912 100644
--- a/examples/infer_water/CMakeLists.txt
+++ b/examples/infer_water/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.7)
+cmake_minimum_required(VERSION 3.25.2)
 project(infer_water)
 
 # find DeePMD-kit
diff --git a/pyproject.toml b/pyproject.toml
index cb11d0258d..4e55fdd96a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,9 +2,10 @@
 requires = [
     # TODO: unpin the upper bound when scikit-build dynamic metadata API is stable
     # dynamic metadata API is still unstable
-    "scikit-build-core>=0.5,<0.11,!=0.6.0",
+    "scikit-build-core>=0.5,!=0.6.0,<0.12",
     "packaging",
     'tomli >= 1.1.0 ; python_version < "3.11"',
+    "dependency_groups",
 ]
 build-backend = "backend.dp_backend"
 backend-path = ["."]
@@ -108,7 +109,7 @@ docs = [
     "sphinx-remove-toctrees",
 ]
 lmp = [
-    "lammps[mpi]~=2025.7.22.1.0",
+    "lammps[mpi]~=2025.7.22.2.0",
 ]
 ipi = [
     "ipi",
@@ -158,6 +159,28 @@ dev = [
   "cmake",
   "mpich",
 ]
+pin_tensorflow_cpu = [
+  # https://github.com/tensorflow/tensorflow/issues/75279
+  # macos x86 has been deprecated
+  "tensorflow-cpu>=2.18,<2.21; platform_machine=='x86_64' and platform_system == 'Linux'",
+  "tensorflow~=2.18.0; (platform_machine!='x86_64' or platform_system != 'Linux') and (platform_machine!='x86_64' or platform_system != 'Darwin')",
+  "tensorflow; platform_machine=='x86_64' and platform_system == 'Darwin'",
+]
+pin_tensorflow_gpu = [
+  "tensorflow~=2.18.0",
+]
+pin_pytorch_cpu = [
+  # https://github.com/pytorch/pytorch/issues/114602
+  # macos x86 has been deprecated
+  "torch~=2.8.0; platform_machine!='x86_64' or platform_system != 'Darwin'",
+  "torch; platform_machine=='x86_64' and platform_system == 'Darwin'",
+]
+pin_pytorch_gpu = [
+  "torch>=2.7,<2.9",
+]
+pin_jax = [
+  "jax==0.5.0;python_version>='3.10'",
+]
 
 [tool.setuptools_scm]
 
@@ -242,7 +265,7 @@ repair-wheel-command = """delocate-wheel --require-archs {delocate_archs} -w {de
 
 [tool.cibuildwheel.macos.environment]
 PIP_PREFER_BINARY = "1"
-DP_LAMMPS_VERSION = "stable_22Jul2025_update1"
+DP_LAMMPS_VERSION = "stable_22Jul2025_update2"
 DP_ENABLE_IPI = "1"
 DP_ENABLE_PYTORCH = "1"
 DP_ENABLE_PADDLE = "1"
@@ -278,14 +301,13 @@ before-build = [
 ]
 [tool.cibuildwheel.linux.environment]
 PIP_PREFER_BINARY = "1"
-DP_LAMMPS_VERSION = "stable_22Jul2025_update1"
+DP_LAMMPS_VERSION = "stable_22Jul2025_update2"
 DP_ENABLE_IPI = "1"
 DP_ENABLE_PYTORCH = "1"
 DP_ENABLE_PADDLE = "1"
 # use CPU version of torch for building, which should also work for GPU
-# note: uv has different behavior from pip on extra index url
-# https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#packages-that-exist-on-multiple-indexes
-UV_EXTRA_INDEX_URL = "https://download.pytorch.org/whl/cpu"
+# https://docs.astral.sh/uv/guides/integration/pytorch/#automatic-backend-selection
+UV_TORCH_BACKEND = "cpu"
 
 [tool.cibuildwheel.windows]
 test-extras = ["cpu", "torch", "paddle"]
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 2c13a5a367..d356d4cba6 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -1,5 +1,5 @@
-# new in 3.16: GET_RUNTIME_DEPENDENCIES, target_precompile_headers
-cmake_minimum_required(VERSION 3.16)
+# new in 3.25: RUN_OUTPUT_STDOUT_VARIABLE
+cmake_minimum_required(VERSION 3.25.2)
 project(DeePMD)
 
 option(ENABLE_TENSORFLOW "Enable TensorFlow interface" OFF)
@@ -244,8 +244,14 @@ set(DP_VARIANT "cpu")
 
 # define USE_CUDA_TOOLKIT
 if(USE_CUDA_TOOLKIT)
-  cmake_minimum_required(VERSION 3.23)
+  cmake_minimum_required(VERSION 3.25.2)
   find_package(CUDAToolkit REQUIRED)
+  # CUDA 13.0+ requires C++17
+  if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "13.0")
+    set_if_higher(CMAKE_CXX_STANDARD 17)
+    message(
+      STATUS "CUDA ${CUDAToolkit_VERSION} detected, setting C++ standard to 17")
+  endif()
   if(NOT DEFINED CMAKE_CUDA_COMPILER)
     set(CMAKE_CUDA_COMPILER ${CUDAToolkit_NVCC_EXECUTABLE})
   endif()
@@ -261,7 +267,7 @@ endif(USE_CUDA_TOOLKIT)
 
 # define USE_ROCM_TOOLKIT
 if(USE_ROCM_TOOLKIT)
-  cmake_minimum_required(VERSION 3.21)
+  cmake_minimum_required(VERSION 3.25.2)
   include(CMakeDetermineHIPCompiler)
   list(APPEND CMAKE_PREFIX_PATH ${CMAKE_HIP_COMPILER_ROCM_ROOT})
   find_package(hip REQUIRED)
diff --git a/source/api_c/tests/CMakeLists.txt b/source/api_c/tests/CMakeLists.txt
index c42055ba6f..ef19305004 100644
--- a/source/api_c/tests/CMakeLists.txt
+++ b/source/api_c/tests/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.25.2)
 project(deepmd_api_c_test)
 
 file(GLOB TEST_SRC test_*.cc)
@@ -11,6 +11,19 @@ target_link_libraries(runUnitTests_c PRIVATE GTest::gtest_main ${LIB_DEEPMD_C}
                                              coverage_config)
 target_link_libraries(runUnitTests_c PRIVATE ${LIB_DEEPMD} ${LIB_DEEPMD_CC})
 
+if(ENABLE_TENSORFLOW)
+  target_compile_definitions(runUnitTests_c PRIVATE BUILD_TENSORFLOW)
+endif()
+if(ENABLE_PYTORCH)
+  target_compile_definitions(runUnitTests_c PRIVATE BUILD_PYTORCH)
+endif()
+if(ENABLE_JAX)
+  target_compile_definitions(runUnitTests_c PRIVATE BUILD_JAX)
+endif()
+if(ENABLE_PADDLE)
+  target_compile_definitions(runUnitTests_c PRIVATE BUILD_PADDLE)
+endif()
+
 if(UNIX AND NOT APPLE)
   find_library(RT_LIBRARY rt)
   if(RT_LIBRARY)
diff --git a/source/api_c/tests/test_deepdipole_hpp.cc b/source/api_c/tests/test_deepdipole_hpp.cc
index f781c34c5b..c4a2b75d09 100644
--- a/source/api_c/tests/test_deepdipole_hpp.cc
+++ b/source/api_c/tests/test_deepdipole_hpp.cc
@@ -26,6 +26,9 @@ class TestInferDeepDipole : public ::testing::Test {
   deepmd::hpp::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole.pbtxt",
                                      "deepdipole.pb");
 
@@ -221,6 +224,9 @@ class TestInferDeepDipoleNew : public ::testing::Test {
   deepmd::hpp::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deepdipole_new.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole_new.pbtxt",
                                      "deepdipole_new.pb");
@@ -434,6 +440,9 @@ class TestInferDeepDipoleFake : public ::testing::Test {
   deepmd::hpp::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole_fake.pbtxt",
                                      "deepdipole_fake.pb");
 
diff --git a/source/api_c/tests/test_deeppolar_hpp.cc b/source/api_c/tests/test_deeppolar_hpp.cc
index 63ebf5d760..5e68b23bb4 100644
--- a/source/api_c/tests/test_deeppolar_hpp.cc
+++ b/source/api_c/tests/test_deeppolar_hpp.cc
@@ -32,6 +32,9 @@ class TestInferDeepPolar : public ::testing::Test {
   deepmd::hpp::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppolar.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppolar.pbtxt",
                                      "deeppolar.pb");
@@ -453,6 +456,9 @@ class TestInferDeepPolarNew : public ::testing::Test {
   deepmd::hpp::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppolar_new.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppolar_new.pbtxt",
                                      "deeppolar_new.pb");
diff --git a/source/api_c/tests/test_deeppot_a.cc b/source/api_c/tests/test_deeppot_a.cc
index b4a9a81f92..d5e2d42bf8 100644
--- a/source/api_c/tests/test_deeppot_a.cc
+++ b/source/api_c/tests/test_deeppot_a.cc
@@ -61,9 +61,12 @@ class TestInferDeepPotA : public ::testing::Test {
   double expected_tot_e;
   std::vector<double> expected_tot_v;
 
-  DP_DeepPot* dp;
+  DP_DeepPot* dp = nullptr;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     const char* file_name = "../../tests/infer/deeppot.pbtxt";
     const char* model_file = "deeppot.pb";
     DP_ConvertPbtxtToPb(file_name, model_file);
@@ -242,9 +245,12 @@ class TestInferDeepPotANoPBC : public ::testing::Test {
   double expected_tot_e;
   std::vector<double> expected_tot_v;
 
-  DP_DeepPot* dp;
+  DP_DeepPot* dp = nullptr;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     const char* file_name = "../../tests/infer/deeppot.pbtxt";
     const char* model_file = "deeppot.pb";
     DP_ConvertPbtxtToPb(file_name, model_file);
diff --git a/source/api_c/tests/test_deeppot_a_fparam_aparam.cc b/source/api_c/tests/test_deeppot_a_fparam_aparam.cc
index a728ede22d..635b47fe51 100644
--- a/source/api_c/tests/test_deeppot_a_fparam_aparam.cc
+++ b/source/api_c/tests/test_deeppot_a_fparam_aparam.cc
@@ -72,6 +72,9 @@ class TestInferDeepPotAFParamAParam : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/fparam_aparam.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb(file_name, "fparam_aparam.pb");
 
@@ -394,6 +397,9 @@ class TestInferAParamNAll : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/pairwise_dprc.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb(file_name, "pairwise_dprc.pb");
     dp.init("pairwise_dprc.pb");
diff --git a/source/api_c/tests/test_deeppot_a_fparam_aparam_nframes.cc b/source/api_c/tests/test_deeppot_a_fparam_aparam_nframes.cc
index 383c8f5fb1..2279ad2f66 100644
--- a/source/api_c/tests/test_deeppot_a_fparam_aparam_nframes.cc
+++ b/source/api_c/tests/test_deeppot_a_fparam_aparam_nframes.cc
@@ -116,6 +116,9 @@ class TestInferDeepPotAFparamAparamNFrames : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/fparam_aparam.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/fparam_aparam.pbtxt",
                                      "fparam_aparam.pb");
@@ -757,6 +760,9 @@ class TestInferDeepPotAFparamAparamNFramesSingleParam : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/fparam_aparam.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/fparam_aparam.pbtxt",
                                      "fparam_aparam.pb");
diff --git a/source/api_c/tests/test_deeppot_a_hpp.cc b/source/api_c/tests/test_deeppot_a_hpp.cc
index 388b53fdfd..3adb47bdf9 100644
--- a/source/api_c/tests/test_deeppot_a_hpp.cc
+++ b/source/api_c/tests/test_deeppot_a_hpp.cc
@@ -66,6 +66,9 @@ class TestInferDeepPotAHPP : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
                                      "deeppot.pb");
@@ -557,6 +560,9 @@ class TestInferDeepPotANoPbcHPP : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb(file_name, "deeppot.pb");
 
diff --git a/source/api_c/tests/test_deeppot_a_nframes_hpp.cc b/source/api_c/tests/test_deeppot_a_nframes_hpp.cc
index af132c0146..bee0546bfc 100644
--- a/source/api_c/tests/test_deeppot_a_nframes_hpp.cc
+++ b/source/api_c/tests/test_deeppot_a_nframes_hpp.cc
@@ -109,6 +109,9 @@ class TestInferDeepPotANFrames : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
                                      "deeppot.pb");
@@ -719,6 +722,9 @@ class TestInferDeepPotANFramesNoPbc : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb(file_name, "deeppot.pb");
 
diff --git a/source/api_c/tests/test_deeppot_model_devi_fparam_aparam.cc b/source/api_c/tests/test_deeppot_model_devi_fparam_aparam.cc
index 5e77529021..550259b42d 100644
--- a/source/api_c/tests/test_deeppot_model_devi_fparam_aparam.cc
+++ b/source/api_c/tests/test_deeppot_model_devi_fparam_aparam.cc
@@ -30,6 +30,9 @@ class TestInferDeepPotModeDeviFparamAparam : public ::testing::Test {
   deepmd::hpp::DeepPotModelDevi dp_md;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     {
       std::string file_name = "../../tests/infer/fparam_aparam.pbtxt";
       deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/fparam_aparam.pbtxt",
diff --git a/source/api_c/tests/test_deeppot_model_devi_hpp.cc b/source/api_c/tests/test_deeppot_model_devi_hpp.cc
index 5751c4f69c..2a8242f99f 100644
--- a/source/api_c/tests/test_deeppot_model_devi_hpp.cc
+++ b/source/api_c/tests/test_deeppot_model_devi_hpp.cc
@@ -24,6 +24,9 @@ class TestInferDeepPotModeDevi : public ::testing::Test {
   deepmd::hpp::DeepPotModelDevi dp_md;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     {
       std::string file_name = "../../tests/infer/deeppot.pbtxt";
       deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
@@ -74,6 +77,9 @@ class TestInferDeepPotModeDeviPython : public ::testing::Test {
   deepmd::hpp::DeepPotModelDevi dp_md;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     {
       std::string file_name = "../../tests/infer/deeppot.pbtxt";
       deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
diff --git a/source/api_c/tests/test_deepspin_a.cc b/source/api_c/tests/test_deepspin_a.cc
index ccffd7221f..8e3236b776 100644
--- a/source/api_c/tests/test_deepspin_a.cc
+++ b/source/api_c/tests/test_deepspin_a.cc
@@ -53,9 +53,12 @@ class TestInferDeepSpinA : public ::testing::Test {
   double expected_tot_e;
   // std::vector<double> expected_tot_v;
 
-  DP_DeepSpin* dp;
+  DP_DeepSpin* dp = nullptr;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp = DP_NewDeepSpin("../../tests/infer/deeppot_dpa_spin.pth");
 
     natoms = expected_e.size();
@@ -233,9 +236,12 @@ class TestInferDeepSpinANoPBC : public ::testing::Test {
   double expected_tot_e;
   // std::vector<double> expected_tot_v;
 
-  DP_DeepSpin* dp;
+  DP_DeepSpin* dp = nullptr;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp = DP_NewDeepSpin("../../tests/infer/deeppot_dpa_spin.pth");
 
     natoms = expected_e.size();
diff --git a/source/api_c/tests/test_deepspin_a_hpp.cc b/source/api_c/tests/test_deepspin_a_hpp.cc
index 701d517690..c65a89a45f 100644
--- a/source/api_c/tests/test_deepspin_a_hpp.cc
+++ b/source/api_c/tests/test_deepspin_a_hpp.cc
@@ -56,6 +56,9 @@ class TestInferDeepSpinAHPP : public ::testing::Test {
   deepmd::hpp::DeepSpin dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp.init("../../tests/infer/deeppot_dpa_spin.pth");
 
     natoms = expected_e.size();
@@ -208,6 +211,9 @@ class TestInferDeepSpinANoPbcHPP : public ::testing::Test {
   deepmd::hpp::DeepSpin dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp.init("../../tests/infer/deeppot_dpa_spin.pth");
 
     natoms = expected_e.size();
diff --git a/source/api_c/tests/test_deepspin_model_devi_hpp.cc b/source/api_c/tests/test_deepspin_model_devi_hpp.cc
index c6bbdef8a8..19acccc6f7 100644
--- a/source/api_c/tests/test_deepspin_model_devi_hpp.cc
+++ b/source/api_c/tests/test_deepspin_model_devi_hpp.cc
@@ -26,6 +26,9 @@ class TestInferDeepSpinModeDevi : public ::testing::Test {
   deepmd::hpp::DeepSpinModelDevi dp_md;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     {
       dp0.init("../../tests/infer/deeppot_dpa_spin.pth");
     }
diff --git a/source/api_c/tests/test_dipolecharge.cc b/source/api_c/tests/test_dipolecharge.cc
index 03e9e44b34..647fa5c3a5 100644
--- a/source/api_c/tests/test_dipolecharge.cc
+++ b/source/api_c/tests/test_dipolecharge.cc
@@ -54,6 +54,9 @@ class TestDipoleCharge : public ::testing::Test {
   deepmd::hpp::DipoleChargeModifier dm;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/dipolecharge_e.pbtxt";
     std::string model = "dipolecharge_e.pb";
     deepmd::hpp::convert_pbtxt_to_pb(file_name, model);
diff --git a/source/api_c/tests/test_mixed_type.cc b/source/api_c/tests/test_mixed_type.cc
index 13f9edbad9..84280f4131 100644
--- a/source/api_c/tests/test_mixed_type.cc
+++ b/source/api_c/tests/test_mixed_type.cc
@@ -29,6 +29,9 @@ class TestInferMixedType : public ::testing::Test {
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/virtual_type.pbtxt";
     deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/virtual_type.pbtxt",
                                      "virtual_type.pb");
diff --git a/source/api_c/tests/test_read_file_to_string.cc b/source/api_c/tests/test_read_file_to_string.cc
index cd26e95928..bef2a28da0 100644
--- a/source/api_c/tests/test_read_file_to_string.cc
+++ b/source/api_c/tests/test_read_file_to_string.cc
@@ -13,6 +13,9 @@
 
 #include "deepmd.hpp"
 TEST(TestReadFileToString, readfiletostring) {
+#ifndef BUILD_TENSORFLOW
+  GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
   std::string file_content;
   deepmd::hpp::read_file_to_string("../../tests/infer/deeppot.txt",
                                    file_content);
@@ -26,6 +29,9 @@ TEST(TestReadFileToString, readfiletostring) {
 }
 
 TEST(TestReadFileToString, readfiletostringerr) {
+#ifndef BUILD_TENSORFLOW
+  GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
   std::string file_content;
   EXPECT_THROW(
       {
diff --git a/source/api_cc/tests/CMakeLists.txt b/source/api_cc/tests/CMakeLists.txt
index 5599b63243..8ec3361d3a 100644
--- a/source/api_cc/tests/CMakeLists.txt
+++ b/source/api_cc/tests/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.25.2)
 project(deepmd_api_test)
 
 file(GLOB TEST_SRC test_*.cc)
@@ -6,6 +6,18 @@ file(GLOB TEST_SRC test_*.cc)
 add_executable(runUnitTests_cc ${TEST_SRC})
 target_link_libraries(runUnitTests_cc GTest::gtest_main ${LIB_DEEPMD_CC}
                       coverage_config)
+if(ENABLE_TENSORFLOW)
+  target_compile_definitions(runUnitTests_cc PRIVATE BUILD_TENSORFLOW)
+endif()
+if(ENABLE_PYTORCH)
+  target_compile_definitions(runUnitTests_cc PRIVATE BUILD_PYTORCH)
+endif()
+if(ENABLE_JAX)
+  target_compile_definitions(runUnitTests_cc PRIVATE BUILD_JAX)
+endif()
+if(ENABLE_PADDLE)
+  target_compile_definitions(runUnitTests_cc PRIVATE BUILD_PADDLE)
+endif()
 
 if(UNIX AND NOT APPLE)
   find_library(RT_LIBRARY rt)
diff --git a/source/api_cc/tests/test_deepdipole.cc b/source/api_cc/tests/test_deepdipole.cc
index 86a8a4131f..4c5ec345a6 100644
--- a/source/api_cc/tests/test_deepdipole.cc
+++ b/source/api_cc/tests/test_deepdipole.cc
@@ -30,6 +30,9 @@ class TestInferDeepDipole : public ::testing::Test {
   deepmd::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole.pbtxt",
                                 "deepdipole.pb");
 
@@ -225,6 +228,9 @@ class TestInferDeepDipoleNew : public ::testing::Test {
   deepmd::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deepdipole_new.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole_new.pbtxt",
                                 "deepdipole_new.pb");
@@ -438,6 +444,9 @@ class TestInferDeepDipoleFake : public ::testing::Test {
   deepmd::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole_fake.pbtxt",
                                 "deepdipole_fake.pb");
 
diff --git a/source/api_cc/tests/test_deepdipole_pt.cc b/source/api_cc/tests/test_deepdipole_pt.cc
index 70e46dd9e9..473f697188 100644
--- a/source/api_cc/tests/test_deepdipole_pt.cc
+++ b/source/api_cc/tests/test_deepdipole_pt.cc
@@ -53,6 +53,9 @@ class TestInferDeepTensorPt : public ::testing::Test {
   deepmd::DeepTensor dt;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deepdipole_pt.pth";
     dt.init(file_name);
   };
diff --git a/source/api_cc/tests/test_deepmd_exception.cc b/source/api_cc/tests/test_deepmd_exception.cc
index c28c0f0069..d150004af1 100644
--- a/source/api_cc/tests/test_deepmd_exception.cc
+++ b/source/api_cc/tests/test_deepmd_exception.cc
@@ -24,25 +24,40 @@ TEST(TestDeepmdException, deepmdexception) {
 }
 
 TEST(TestDeepmdException, deepmdexception_nofile_deeppot) {
+#ifndef BUILD_TENSORFLOW
+  GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
   ASSERT_THROW(deepmd::DeepPot("_no_such_file.pb"), deepmd::deepmd_exception);
 }
 
 TEST(TestDeepmdException, deepmdexception_nofile_deeppot_pt) {
+#ifndef BUILD_PYTORCH
+  GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
   ASSERT_THROW(deepmd::DeepPot("_no_such_file.pth"), deepmd::deepmd_exception);
 }
 
 TEST(TestDeepmdException, deepmdexception_nofile_deeppotmodeldevi) {
+#ifndef BUILD_TENSORFLOW
+  GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
   ASSERT_THROW(
       deepmd::DeepPotModelDevi({"_no_such_file.pb", "_no_such_file.pb"}),
       deepmd::deepmd_exception);
 }
 
 TEST(TestDeepmdException, deepmdexception_nofile_deeptensor) {
+#ifndef BUILD_TENSORFLOW
+  GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
   ASSERT_THROW(deepmd::DeepTensor("_no_such_file.pb"),
                deepmd::deepmd_exception);
 }
 
 TEST(TestDeepmdException, deepmdexception_nofile_dipolechargemodifier) {
+#ifndef BUILD_TENSORFLOW
+  GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
   ASSERT_THROW(deepmd::DipoleChargeModifier("_no_such_file.pb"),
                deepmd::deepmd_exception);
 }
diff --git a/source/api_cc/tests/test_deeppolar.cc b/source/api_cc/tests/test_deeppolar.cc
index 89014fd245..dbeb61db9f 100644
--- a/source/api_cc/tests/test_deeppolar.cc
+++ b/source/api_cc/tests/test_deeppolar.cc
@@ -36,6 +36,9 @@ class TestInferDeepPolar : public ::testing::Test {
   deepmd::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppolar.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppolar.pbtxt",
                                 "deeppolar.pb");
@@ -457,6 +460,9 @@ class TestInferDeepPolarNew : public ::testing::Test {
   deepmd::DeepTensor dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppolar_new.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppolar_new.pbtxt",
                                 "deeppolar_new.pb");
diff --git a/source/api_cc/tests/test_deeppot_a.cc b/source/api_cc/tests/test_deeppot_a.cc
index ae1384e8a3..ffd2a25a5d 100644
--- a/source/api_cc/tests/test_deeppot_a.cc
+++ b/source/api_cc/tests/test_deeppot_a.cc
@@ -70,6 +70,9 @@ class TestInferDeepPotA : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
                                 "deeppot.pb");
@@ -607,6 +610,9 @@ class TestInferDeepPotANoPbc : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
     deepmd::convert_pbtxt_to_pb(file_name, "deeppot.pb");
 
diff --git a/source/api_cc/tests/test_deeppot_a_fparam_aparam.cc b/source/api_cc/tests/test_deeppot_a_fparam_aparam.cc
index 89e83603a3..93efee28fe 100644
--- a/source/api_cc/tests/test_deeppot_a_fparam_aparam.cc
+++ b/source/api_cc/tests/test_deeppot_a_fparam_aparam.cc
@@ -73,6 +73,9 @@ class TestInferDeepPotAFParamAParam : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/fparam_aparam.pbtxt";
     deepmd::convert_pbtxt_to_pb(file_name, "fparam_aparam.pb");
 
diff --git a/source/api_cc/tests/test_deeppot_a_fparam_aparam_nframes.cc b/source/api_cc/tests/test_deeppot_a_fparam_aparam_nframes.cc
index 0851523814..e33437805c 100644
--- a/source/api_cc/tests/test_deeppot_a_fparam_aparam_nframes.cc
+++ b/source/api_cc/tests/test_deeppot_a_fparam_aparam_nframes.cc
@@ -117,6 +117,9 @@ class TestInferDeepPotAFparamAparamNFrames : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/fparam_aparam.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/fparam_aparam.pbtxt",
                                 "fparam_aparam.pb");
@@ -758,6 +761,9 @@ class TestInferDeepPotAFparamAparamNFramesSingleParam : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/fparam_aparam.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/fparam_aparam.pbtxt",
                                 "fparam_aparam.pb");
diff --git a/source/api_cc/tests/test_deeppot_a_fparam_aparam_pt.cc b/source/api_cc/tests/test_deeppot_a_fparam_aparam_pt.cc
index e394f86b2a..bc7661f671 100644
--- a/source/api_cc/tests/test_deeppot_a_fparam_aparam_pt.cc
+++ b/source/api_cc/tests/test_deeppot_a_fparam_aparam_pt.cc
@@ -77,6 +77,9 @@ class TestInferDeepPotAFParamAParamPt : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp.init("../../tests/infer/fparam_aparam.pth");
 
     natoms = expected_e.size();
diff --git a/source/api_cc/tests/test_deeppot_a_nframes.cc b/source/api_cc/tests/test_deeppot_a_nframes.cc
index c83a7a0b41..1e7c160437 100644
--- a/source/api_cc/tests/test_deeppot_a_nframes.cc
+++ b/source/api_cc/tests/test_deeppot_a_nframes.cc
@@ -113,6 +113,9 @@ class TestInferDeepPotANFrames : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
                                 "deeppot.pb");
@@ -723,6 +726,9 @@ class TestInferDeepPotANFramesNoPbc : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
     deepmd::convert_pbtxt_to_pb(file_name, "deeppot.pb");
 
diff --git a/source/api_cc/tests/test_deeppot_dpa_jax.cc b/source/api_cc/tests/test_deeppot_dpa_jax.cc
index bd7f1ee7af..2e857450b7 100644
--- a/source/api_cc/tests/test_deeppot_dpa_jax.cc
+++ b/source/api_cc/tests/test_deeppot_dpa_jax.cc
@@ -77,6 +77,9 @@ class TestInferDeepPotDpaJAX : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_JAX
+    GTEST_SKIP() << "Skip because JAX support is not enabled.";
+#endif
     dp.init("../../tests/infer/deeppot_dpa.savedmodel");
 
     natoms = expected_e.size();
@@ -224,6 +227,9 @@ class TestInferDeepPotDpaJAXNopbc : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_JAX
+    GTEST_SKIP() << "Skip because JAX support is not enabled.";
+#endif
     dp.init("../../tests/infer/deeppot_dpa.savedmodel");
 
     natoms = expected_e.size();
diff --git a/source/api_cc/tests/test_deeppot_dpa_pt.cc b/source/api_cc/tests/test_deeppot_dpa_pt.cc
index 7bf4cbf376..0de83278a9 100644
--- a/source/api_cc/tests/test_deeppot_dpa_pt.cc
+++ b/source/api_cc/tests/test_deeppot_dpa_pt.cc
@@ -77,6 +77,9 @@ class TestInferDeepPotDpaPt : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp.init("../../tests/infer/deeppot_dpa.pth");
 
     natoms = expected_e.size();
@@ -224,6 +227,9 @@ class TestInferDeepPotDpaPtNopbc : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp.init("../../tests/infer/deeppot_dpa.pth");
 
     natoms = expected_e.size();
diff --git a/source/api_cc/tests/test_deeppot_dpa_pt_spin.cc b/source/api_cc/tests/test_deeppot_dpa_pt_spin.cc
index f14aa7a52e..8b569dd707 100644
--- a/source/api_cc/tests/test_deeppot_dpa_pt_spin.cc
+++ b/source/api_cc/tests/test_deeppot_dpa_pt_spin.cc
@@ -85,6 +85,9 @@ class TestInferDeepSpinDpaPt : public ::testing::Test {
   deepmd::DeepSpin dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp.init("../../tests/infer/deeppot_dpa_spin.pth");
 
     natoms = expected_e.size();
@@ -248,6 +251,9 @@ class TestInferDeepSpinDpaPtNopbc : public ::testing::Test {
   deepmd::DeepSpin dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     dp.init("../../tests/infer/deeppot_dpa_spin.pth");
 
     natoms = expected_e.size();
diff --git a/source/api_cc/tests/test_deeppot_jax.cc b/source/api_cc/tests/test_deeppot_jax.cc
index 439a271015..80ddf00860 100644
--- a/source/api_cc/tests/test_deeppot_jax.cc
+++ b/source/api_cc/tests/test_deeppot_jax.cc
@@ -69,6 +69,9 @@ class TestInferDeepPotAJAX : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_JAX
+    GTEST_SKIP() << "Skip because JAX support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot_sea.savedmodel";
 
     dp.init(file_name);
diff --git a/source/api_cc/tests/test_deeppot_model_devi.cc b/source/api_cc/tests/test_deeppot_model_devi.cc
index 5ce4b11c7f..ea89847935 100644
--- a/source/api_cc/tests/test_deeppot_model_devi.cc
+++ b/source/api_cc/tests/test_deeppot_model_devi.cc
@@ -28,6 +28,9 @@ class TestInferDeepPotModeDevi : public ::testing::Test {
   deepmd::DeepPotModelDevi dp_md;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     {
       std::string file_name = "../../tests/infer/deeppot.pbtxt";
       deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
@@ -78,6 +81,9 @@ class TestInferDeepPotModeDeviPython : public ::testing::Test {
   deepmd::DeepPotModelDevi dp_md;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     {
       std::string file_name = "../../tests/infer/deeppot.pbtxt";
       deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
diff --git a/source/api_cc/tests/test_deeppot_model_devi_fparam_aparam.cc b/source/api_cc/tests/test_deeppot_model_devi_fparam_aparam.cc
index fb1797961d..3c49a34db5 100644
--- a/source/api_cc/tests/test_deeppot_model_devi_fparam_aparam.cc
+++ b/source/api_cc/tests/test_deeppot_model_devi_fparam_aparam.cc
@@ -31,6 +31,9 @@ class TestInferDeepPotModeDeviFparamAparam : public ::testing::Test {
   deepmd::DeepPotModelDevi dp_md;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     {
       std::string file_name = "../../tests/infer/fparam_aparam.pbtxt";
       deepmd::convert_pbtxt_to_pb("../../tests/infer/fparam_aparam.pbtxt",
diff --git a/source/api_cc/tests/test_deeppot_pd.cc b/source/api_cc/tests/test_deeppot_pd.cc
index f39aa84640..6da9ee643b 100644
--- a/source/api_cc/tests/test_deeppot_pd.cc
+++ b/source/api_cc/tests/test_deeppot_pd.cc
@@ -61,6 +61,9 @@ class TestInferDeepPotAPd : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_PADDLE
+    GTEST_SKIP() << "Skip because Paddle support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot_sea.json";
 
     dp.init(file_name);
@@ -578,6 +581,9 @@ class TestInferDeepPotAPdNoPbc : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_PADDLE
+    GTEST_SKIP() << "Skip because Paddle support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot_sea.json";
     dp.init(file_name);
 
diff --git a/source/api_cc/tests/test_deeppot_pt.cc b/source/api_cc/tests/test_deeppot_pt.cc
index af5b533a5f..144ee8da8c 100644
--- a/source/api_cc/tests/test_deeppot_pt.cc
+++ b/source/api_cc/tests/test_deeppot_pt.cc
@@ -54,6 +54,9 @@ class TestInferDeepPotAPt : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot_sea.pth";
 
     dp.init(file_name);
@@ -570,6 +573,9 @@ class TestInferDeepPotAPtNoPbc : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot_sea.pth";
     dp.init(file_name);
 
diff --git a/source/api_cc/tests/test_deeppot_r.cc b/source/api_cc/tests/test_deeppot_r.cc
index 86fa4b091f..99c5eca6e5 100644
--- a/source/api_cc/tests/test_deeppot_r.cc
+++ b/source/api_cc/tests/test_deeppot_r.cc
@@ -70,6 +70,9 @@ class TestInferDeepPotR : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot-r.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-r.pbtxt",
                                 "deeppot.pb");
@@ -539,6 +542,9 @@ class TestInferDeepPotRNoPbc : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deeppot-r.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-r.pbtxt",
                                 "deeppot.pb");
diff --git a/source/api_cc/tests/test_deeppot_tf_spin.cc b/source/api_cc/tests/test_deeppot_tf_spin.cc
index d03a9b0b57..c7623d8faf 100644
--- a/source/api_cc/tests/test_deeppot_tf_spin.cc
+++ b/source/api_cc/tests/test_deeppot_tf_spin.cc
@@ -41,6 +41,9 @@ class TestInferDeepSpin : public ::testing::Test {
   deepmd::DeepSpin dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deepspin_nlist.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deepspin_nlist.pbtxt",
                                 "deepspin_nlist.pb");
@@ -168,6 +171,9 @@ class TestInferDeepSpinNopbc : public ::testing::Test {
   deepmd::DeepSpin dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/deepspin_nlist.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/deepspin_nlist.pbtxt",
                                 "deepspin_nlist.pb");
diff --git a/source/api_cc/tests/test_deepspin_model_devi.cc b/source/api_cc/tests/test_deepspin_model_devi.cc
index fcc4a4315d..56104a4f99 100644
--- a/source/api_cc/tests/test_deepspin_model_devi.cc
+++ b/source/api_cc/tests/test_deepspin_model_devi.cc
@@ -30,6 +30,9 @@ class TestInferDeepSpinModeDevi : public ::testing::Test {
   deepmd::DeepSpinModelDevi dp_md;
 
   void SetUp() override {
+#ifndef BUILD_PYTORCH
+    GTEST_SKIP() << "Skip because PyTorch support is not enabled.";
+#endif
     {
       dp0.init("../../tests/infer/deeppot_dpa_spin.pth");
     }
diff --git a/source/api_cc/tests/test_dipolecharge.cc b/source/api_cc/tests/test_dipolecharge.cc
index 1d92215c5c..1a33d5ed1c 100644
--- a/source/api_cc/tests/test_dipolecharge.cc
+++ b/source/api_cc/tests/test_dipolecharge.cc
@@ -59,6 +59,9 @@ class TestDipoleCharge : public ::testing::Test {
   deepmd::DipoleChargeModifier dm;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/dipolecharge_e.pbtxt";
     std::string model = "dipolecharge_e.pb";
     deepmd::convert_pbtxt_to_pb(file_name, model);
diff --git a/source/api_cc/tests/test_mixed_type.cc b/source/api_cc/tests/test_mixed_type.cc
index b11bd5123a..0a577e21f2 100644
--- a/source/api_cc/tests/test_mixed_type.cc
+++ b/source/api_cc/tests/test_mixed_type.cc
@@ -30,6 +30,9 @@ class TestInferMixedType : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/virtual_type.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/virtual_type.pbtxt",
                                 "virtual_type.pb");
@@ -137,6 +140,9 @@ class TestInferVirtualType : public ::testing::Test {
   deepmd::DeepPot dp;
 
   void SetUp() override {
+#ifndef BUILD_TENSORFLOW
+    GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
     std::string file_name = "../../tests/infer/virtual_type.pbtxt";
     deepmd::convert_pbtxt_to_pb("../../tests/infer/virtual_type.pbtxt",
                                 "virtual_type.pb");
diff --git a/source/api_cc/tests/test_read_file_to_string.cc b/source/api_cc/tests/test_read_file_to_string.cc
index aaa7e6a83f..dd3249d771 100644
--- a/source/api_cc/tests/test_read_file_to_string.cc
+++ b/source/api_cc/tests/test_read_file_to_string.cc
@@ -13,6 +13,9 @@
 
 #include "common.h"
 TEST(TestReadFileToString, readfiletostring) {
+#ifndef BUILD_TENSORFLOW
+  GTEST_SKIP() << "Skip because TensorFlow support is not enabled.";
+#endif
   std::string file_content;
   deepmd::read_file_to_string("../../tests/infer/deeppot.txt", file_content);
 
diff --git a/source/install/build_cc.sh b/source/install/build_cc.sh
index 7f21b83eee..56c50e92f5 100755
--- a/source/install/build_cc.sh
+++ b/source/install/build_cc.sh
@@ -26,7 +26,7 @@ cmake -D ENABLE_TENSORFLOW=ON \
 	-D USE_TF_PYTHON_LIBS=TRUE \
 	-D USE_PT_PYTHON_LIBS=TRUE \
 	${CUDA_ARGS} \
-	-D LAMMPS_VERSION=stable_22Jul2025_update1 \
+	-D LAMMPS_VERSION=stable_22Jul2025_update2 \
 	..
 cmake --build . -j${NPROC}
 cmake --install .
diff --git a/source/install/build_from_c.sh b/source/install/build_from_c.sh
index 7c73b8543b..903d50bb72 100755
--- a/source/install/build_from_c.sh
+++ b/source/install/build_from_c.sh
@@ -13,7 +13,7 @@ NPROC=$(nproc --all)
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DDEEPMD_C_ROOT=${DEEPMD_C_ROOT} -DLAMMPS_VERSION=stable_22Jul2025_update1 ..
+cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DDEEPMD_C_ROOT=${DEEPMD_C_ROOT} -DLAMMPS_VERSION=stable_22Jul2025_update2 ..
 cmake --build . -j${NPROC}
 cmake --install .
 cmake --build . --target=lammps
diff --git a/source/install/build_lammps.sh b/source/install/build_lammps.sh
index 57af2f261a..3cf981c5f2 100755
--- a/source/install/build_lammps.sh
+++ b/source/install/build_lammps.sh
@@ -14,7 +14,7 @@ BUILD_TMP_DIR=${SCRIPT_PATH}/../build_lammps
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
 # download LAMMMPS
-LAMMPS_VERSION=stable_22Jul2025_update1
+LAMMPS_VERSION=stable_22Jul2025_update2
 if [ ! -d "lammps-${LAMMPS_VERSION}" ]; then
 	curl -L -o lammps.tar.gz https://github.com/lammps/lammps/archive/refs/tags/${LAMMPS_VERSION}.tar.gz
 	tar vxzf lammps.tar.gz
diff --git a/source/install/test_cc.sh b/source/install/test_cc.sh
index f45b936d3e..8058edda87 100755
--- a/source/install/test_cc.sh
+++ b/source/install/test_cc.sh
@@ -17,7 +17,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_22Jul2025_update1 ${CUDA_ARGS} ..
+cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_22Jul2025_update2 ${CUDA_ARGS} ..
 cmake --build . -j${NPROC}
 cmake --install .
 ctest --output-on-failure
diff --git a/source/install/test_cc_local.sh b/source/install/test_cc_local.sh
index c34c27fa64..86088bc0b3 100755
--- a/source/install/test_cc_local.sh
+++ b/source/install/test_cc_local.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
 set -ex
 
 if [ "$DP_VARIANT" = "cuda" ]; then
@@ -20,18 +21,20 @@ PADDLE_INFERENCE_DIR=${BUILD_TMP_DIR}/paddle_inference_install_dir
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
 cmake \
-	-D ENABLE_TENSORFLOW=TRUE \
-	-D ENABLE_PYTORCH=TRUE \
-	-D ENABLE_PADDLE=TRUE \
+	-D ENABLE_TENSORFLOW=${ENABLE_TENSORFLOW:-TRUE} \
+	-D ENABLE_PYTORCH=${ENABLE_PYTORCH:-TRUE} \
+	-D ENABLE_PADDLE=${ENABLE_PADDLE:-TRUE} \
 	-D INSTALL_TENSORFLOW=FALSE \
-	-D USE_TF_PYTHON_LIBS=TRUE \
-	-D USE_PT_PYTHON_LIBS=TRUE \
+	-D USE_TF_PYTHON_LIBS=${ENABLE_TENSORFLOW:-TRUE} \
+	-D USE_PT_PYTHON_LIBS=${ENABLE_PYTORCH:-TRUE} \
 	-D CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
 	-D BUILD_TESTING:BOOL=TRUE \
-	-D LAMMPS_VERSION=stable_22Jul2025_update1 \
+	-D LAMMPS_VERSION=stable_22Jul2025_update2 \
 	${CUDA_ARGS} ..
 cmake --build . -j${NPROC}
 cmake --install .
-PADDLE_INFERENCE_DIR=${BUILD_TMP_DIR}/paddle_inference_install_dir
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PADDLE_INFERENCE_DIR}/third_party/install/onednn/lib:${PADDLE_INFERENCE_DIR}/third_party/install/mklml/lib
+if [ "${ENABLE_PADDLE:-TRUE}" == "TRUE" ]; then
+	PADDLE_INFERENCE_DIR=${BUILD_TMP_DIR}/paddle_inference_install_dir
+	export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PADDLE_INFERENCE_DIR}/third_party/install/onednn/lib:${PADDLE_INFERENCE_DIR}/third_party/install/mklml/lib
+fi
 ctest --output-on-failure
diff --git a/source/ipi/tests/test_driver.py b/source/ipi/tests/test_driver.py
index 54c632a2b6..9a30c2c8ce 100644
--- a/source/ipi/tests/test_driver.py
+++ b/source/ipi/tests/test_driver.py
@@ -53,6 +53,10 @@ def write_input(self, atoms, **kwargs) -> None:
         atoms.write(self.xyz_file, format="xyz")
 
 
+@unittest.skipIf(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 class TestDPIPI(unittest.TestCase):
     # copy from test_deeppot_a.py
     @classmethod
@@ -253,6 +257,10 @@ def test_normalize_coords(self) -> None:
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
 
 
+@unittest.skipIf(
+    os.environ.get("ENABLE_PYTORCH", "1") != "1",
+    reason="Skip test because PyTorch support is not enabled.",
+)
 class TestDPIPIPt(TestDPIPI):
     @classmethod
     def setUpClass(cls) -> None:
diff --git a/source/lib/include/tabulate.h b/source/lib/include/tabulate.h
index 47c3062449..ab57ddf6e9 100644
--- a/source/lib/include/tabulate.h
+++ b/source/lib/include/tabulate.h
@@ -111,6 +111,41 @@ void tabulate_fusion_se_r_grad_grad_cpu(FPTYPE* dz_dy,
                                         const int nnei,
                                         const int last_layer_size);
 
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_cpu(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei_i,
+                                   const int nnei_j,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_grad_cpu(FPTYPE* dy_dem_x,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei_i,
+                                        const int nnei_j,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_grad_grad_cpu(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const int nloc,
+                                             const int nnei_i,
+                                             const int nnei_j,
+                                             const int last_layer_size);
+
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename FPTYPE>
 void tabulate_fusion_se_a_gpu(FPTYPE* out,
@@ -219,5 +254,40 @@ void tabulate_fusion_se_r_grad_grad_gpu(FPTYPE* dz_dy,
                                         const int nloc,
                                         const int nnei,
                                         const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_gpu(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei_i,
+                                   const int nnei_j,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_grad_gpu(FPTYPE* dy_dem_x,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei_i,
+                                        const int nnei_j,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_grad_grad_gpu(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const int nloc,
+                                             const int nnei_i,
+                                             const int nnei_j,
+                                             const int last_layer_size);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 }  // namespace deepmd
diff --git a/source/lib/src/gpu/CMakeLists.txt b/source/lib/src/gpu/CMakeLists.txt
index 0d176dc320..e51181315c 100644
--- a/source/lib/src/gpu/CMakeLists.txt
+++ b/source/lib/src/gpu/CMakeLists.txt
@@ -1,6 +1,7 @@
 if(USE_CUDA_TOOLKIT)
-  # required cmake version 3.23: CMAKE_CUDA_ARCHITECTURES all
-  cmake_minimum_required(VERSION 3.23)
+  # required cmake version 3.25: CMAKE_CUDA_ARCHITECTURES all,
+  # RUN_OUTPUT_STDOUT_VARIABLE
+  cmake_minimum_required(VERSION 3.25.2)
   # project name
   project(deepmd_op_cuda)
   set(GPU_LIB_NAME deepmd_op_cuda)
@@ -25,7 +26,7 @@ if(USE_CUDA_TOOLKIT)
 
   # cub has been included in CUDA Toolkit 11, we do not need to include it any
   # more see https://github.com/NVIDIA/cub
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS "11")
+  if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11")
     include(FetchContent)
     FetchContent_Declare(
       cub_download
@@ -38,10 +39,25 @@ if(USE_CUDA_TOOLKIT)
     endif()
     include_directories(${CUB_SOURCE_ROOT})
   endif()
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS "9")
+  if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "9")
     message(FATAL_ERROR "CUDA version must be >= 9.0")
   endif()
 
+  # NVCC compilation errors with gcc-14 and c++11 Cases in other repos:
+  # https://gitlab.archlinux.org/archlinux/packaging/packages/cuda/-/issues/12
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "14")
+    set_if_higher(CMAKE_CUDA_STANDARD 14)
+  endif()
+
+  # CUDA 13.0+ requires C++17
+  if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
+    set_if_higher(CMAKE_CUDA_STANDARD 17)
+    message(
+      STATUS
+        "CUDA ${CMAKE_CUDA_COMPILER_VERSION} detected, setting C++ standard to 17"
+    )
+  endif()
+
   message(STATUS "NVCC version is " ${CMAKE_CUDA_COMPILER_VERSION})
 
   # arch will be configured by CMAKE_CUDA_ARCHITECTURES
@@ -54,7 +70,7 @@ if(USE_CUDA_TOOLKIT)
 elseif(USE_ROCM_TOOLKIT)
 
   # required cmake version
-  cmake_minimum_required(VERSION 3.21)
+  cmake_minimum_required(VERSION 3.25.2)
   # project name
   project(deepmd_op_rocm)
   enable_language(HIP)
diff --git a/source/lib/src/gpu/tabulate.cu b/source/lib/src/gpu/tabulate.cu
index e0723b81af..72c4c7a4e1 100644
--- a/source/lib/src/gpu/tabulate.cu
+++ b/source/lib/src/gpu/tabulate.cu
@@ -103,6 +103,38 @@ __forceinline__ __device__ void locate_xx_se_t(FPTYPE& xx,
   }
 }
 
+// same with locate_xx_se_t
+template <typename FPTYPE>
+__forceinline__ __device__ void locate_xx_se_t_tebd(FPTYPE& xx,
+                                                    int& table_idx,
+                                                    const FPTYPE& lower,
+                                                    const FPTYPE& upper,
+                                                    const FPTYPE& min,
+                                                    const FPTYPE& max,
+                                                    const FPTYPE& stride0,
+                                                    const FPTYPE& stride1) {
+  if (xx < min) {
+    table_idx = 0;
+    xx = (FPTYPE)0.;
+  } else if (xx < lower) {
+    table_idx = (int)((xx - min) / stride1);
+    xx -= (table_idx * stride1 + min);
+  } else if (xx < upper) {
+    int first_stride = int((lower - min) / stride1);
+    table_idx = first_stride + (int)((xx - lower) / stride0);
+    xx -= ((table_idx - first_stride) * stride0 + lower);
+  } else if (xx < max) {
+    int first_stride =
+        int((lower - min) / stride1) + int((upper - lower) / stride0);
+    table_idx = first_stride + (int)((xx - upper) / stride1);
+    xx -= ((table_idx - first_stride) * stride1 + upper);
+  } else {
+    table_idx = int((lower - min) / stride1) + int((upper - lower) / stride0) +
+                (int)((max - upper) / stride1) - 1;
+    xx = (FPTYPE)0.;
+  }
+}
+
 template <typename FPTYPE>
 __forceinline__ __device__ void locate_xx_se_r(FPTYPE& xx,
                                                int& table_idx,
@@ -599,7 +631,7 @@ __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
   FPTYPE sum = (FPTYPE)0.;
   for (int ii = 0; ii < nnei_i; ii++) {
     int mark_table_idx = -1;
-    for (int jj = 0; ii < nnei_j; jj++) {
+    for (int jj = 0; jj < nnei_j; jj++) {
       FPTYPE xx = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE tmp = xx;
       FPTYPE dz_xx =
@@ -630,6 +662,194 @@ __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
   dz_dy[block_idx * last_layer_size + thread_idx] = sum;
 }
 
+// Apply Grid-Stride Loop
+template <typename FPTYPE, int MTILE, int KTILE>
+__global__ void tabulate_fusion_se_t_tebd_fifth_order_polynomial(
+    FPTYPE* out,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size,
+    const int_64 total_work) {
+  // NOT USED: em: (nfnl, nnei_i, nnei_j)
+  // em_x: (nfnl * nnei_i * nnei_j, 1) flat version of em
+  // total_work = nloc * nnei_i * nnei_j
+  // Grid-Stride Loop
+  for (int_64 i = (int_64)blockIdx.x * blockDim.x + threadIdx.x; i < total_work;
+       i += (int_64)gridDim.x * blockDim.x) {
+    // Decompose the 1D index 'i' to get atom and neighbor indices
+    const int_64 block_idx = i / (nnei_i * nnei_j);
+    const int_64 local_idx = i % (nnei_i * nnei_j);
+    const int_64 ii = local_idx / nnei_j;
+    const int_64 jj = local_idx % nnei_j;
+
+    // Read the input value xx for this specific neighbor pair.
+    FPTYPE xx = em_x[i];
+
+    // Determine the table index based on the value of xx.
+    int table_idx = 0;
+    locate_xx_se_t_tebd(xx, table_idx, lower, upper, -max, max, stride0,
+                        stride1);
+
+    // Serially loop through the 'last_layer_size' dimension to calculate all
+    // features.
+    for (int idx = 0; idx < last_layer_size; idx++) {
+      FPTYPE var[6];
+      load_polynomial_params(var, table, table_idx, idx, last_layer_size);
+      FPTYPE res =
+          var[0] +
+          (var[1] +
+           (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+              xx;
+      // Calculate the unique 1D output index for the 4D tensor (block_idx, ii,
+      // jj, idx).
+      const int_64 out_idx =
+          (int_64)block_idx * nnei_i * nnei_j * last_layer_size +
+          (int_64)ii * nnei_j * last_layer_size + (int_64)jj * last_layer_size +
+          idx;
+      // Write the result to the global output memory.
+      out[out_idx] = res;
+    }
+  }
+}
+
+// Apply Grid-Stride Loop
+template <typename FPTYPE, int MTILE, int KTILE>
+__global__ void tabulate_fusion_se_t_tebd_grad_fifth_order_polynomial(
+    FPTYPE* dy_dem_x,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dy,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size,
+    const int_64 total_work) {
+  // total_work = nloc * nnei_i * nnei_j
+  // Grid-Stride Loop
+  for (int_64 i = (int_64)blockIdx.x * blockDim.x + threadIdx.x; i < total_work;
+       i += (int_64)gridDim.x * blockDim.x) {
+    // Decompose the 1D index 'i' to get atom and neighbor indices
+    const int_64 block_idx = i / (nnei_i * nnei_j);
+    const int_64 local_idx = i % (nnei_i * nnei_j);
+    const int ii = local_idx / nnei_j;
+    const int jj = local_idx % nnei_j;
+
+    // Determine the table index based on the value of xx.
+    FPTYPE xx = em_x[i];
+    int table_idx = 0;
+    locate_xx_se_t_tebd(xx, table_idx, lower, upper, -max, max, stride0,
+                        stride1);
+
+    // Accumulate the gradient contributions from all features.
+    FPTYPE grad_sum = 0.0;
+    for (int idx = 0; idx < last_layer_size; idx++) {
+      FPTYPE var[6];
+      load_polynomial_params(var, table, table_idx, idx, last_layer_size);
+
+      // Calculate the derivative of the polynomial with respect to xx.
+      FPTYPE dres_dxx =
+          var[1] + ((FPTYPE)2. * var[2] +
+                    ((FPTYPE)3. * var[3] +
+                     ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                        xx) *
+                       xx;
+
+      // Read the incoming gradient from the previous layer.
+      const int_64 dy_idx =
+          (int_64)block_idx * nnei_i * nnei_j * last_layer_size +
+          (int_64)ii * nnei_j * last_layer_size + (int_64)jj * last_layer_size +
+          idx;
+      FPTYPE dy_val = dy[dy_idx];
+
+      // Apply the chain rule: dL/dxx = sum over idx [ (dL/d_res_mm) *
+      // (d_res_mm/dxx) ]
+      grad_sum += dy_val * dres_dxx;
+    }
+
+    // Write the final summed gradient to the output array.
+    dy_dem_x[i] = grad_sum;
+  }
+}
+
+// Apply Grid-Stride Loop
+template <typename FPTYPE, int MTILE, int KTILE>
+__global__ void tabulate_fusion_se_t_tebd_grad_grad_fifth_order_polynomial(
+    FPTYPE* dz_dy,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dz_dy_dem_x,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size,
+    const int_64 total_work) {
+  // total_work = nloc * nnei_i * nnei_j
+  // Grid-Stride Loop
+  for (int_64 i = (int_64)blockIdx.x * blockDim.x + threadIdx.x; i < total_work;
+       i += (int_64)gridDim.x * blockDim.x) {
+    // Decompose the 1D index 'i' to get atom and neighbor indices
+    const int_64 block_idx = i / (nnei_i * nnei_j);
+    const int_64 local_idx = i % (nnei_i * nnei_j);
+    const int ii = local_idx / nnei_j;
+    const int jj = local_idx % nnei_j;
+
+    FPTYPE xx = em_x[i];
+
+    // Read the incoming gradient for xx. This value is the same for all 'idx'
+    // features.
+    const FPTYPE dz_dy_dem_x_val = dz_dy_dem_x[i];
+
+    // Determine the table index based on the value of xx.
+    int table_idx = 0;
+    locate_xx_se_t_tebd(xx, table_idx, lower, upper, -max, max, stride0,
+                        stride1);
+
+    // Serially loop through the 'last_layer_size' dimension.
+    for (int idx = 0; idx < last_layer_size; idx++) {
+      FPTYPE var[6];
+      load_polynomial_params(var, table, table_idx, idx, last_layer_size);
+
+      // Calculate the derivative of the polynomial with respect to xx.
+      FPTYPE dres_dxx =
+          var[1] + ((FPTYPE)2. * var[2] +
+                    ((FPTYPE)3. * var[3] +
+                     ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                        xx) *
+                       xx;
+
+      // Apply the chain rule: dz/dy_idx = (dz/dxx) * (dxx/dy_idx)
+      // which simplifies to dz_dy_dem_x_val * dres_dxx
+      FPTYPE out_grad = dz_dy_dem_x_val * dres_dxx;
+
+      // Calculate the unique 1D output index for the 4D tensor (block_idx, ii,
+      // jj, idx).
+      const int_64 out_idx =
+          (int_64)block_idx * nnei_i * nnei_j * last_layer_size +
+          (int_64)ii * nnei_j * last_layer_size + (int_64)jj * last_layer_size +
+          idx;
+      dz_dy[out_idx] = out_grad;
+    }
+  }
+}
+
 template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_r_fifth_order_polynomial(
     FPTYPE* out,
@@ -923,6 +1143,99 @@ void tabulate_fusion_se_t_grad_grad_gpu(FPTYPE* dz_dy,
   DPErrcheck(gpuDeviceSynchronize());
 }
 
+// SE_T_TEBD kernels - preserve full nt_i x nt_j structure unlike SE_T
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_gpu(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei_i,
+                                   const int nnei_j,
+                                   const int last_layer_size) {
+  if (nloc <= 0 || nnei_i <= 0 || nnei_j <= 0) {
+    return;
+  }
+  const int_64 total_work = (int_64)nloc * nnei_i * nnei_j;
+  // Use fixed number of threads per block
+  const int num_threads = TPB;
+  // Calculate number of blocks needed
+  const int num_blocks = (total_work + num_threads - 1) / num_threads;
+
+  DPErrcheck(gpuGetLastError());
+  DPErrcheck(gpuDeviceSynchronize());
+  tabulate_fusion_se_t_tebd_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<num_blocks, num_threads>>>(out, table, em_x, em, table_info[0],
+                                    table_info[1], table_info[2], table_info[3],
+                                    table_info[4], nnei_i, nnei_j,
+                                    last_layer_size, total_work);
+  DPErrcheck(gpuGetLastError());
+  DPErrcheck(gpuDeviceSynchronize());
+}
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_grad_gpu(FPTYPE* dy_dem_x,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei_i,
+                                        const int nnei_j,
+                                        const int last_layer_size) {
+  if (nloc <= 0 || nnei_i <= 0 || nnei_j <= 0) {
+    return;
+  }
+  const int_64 total_work = (int_64)nloc * nnei_i * nnei_j;
+  const int num_threads = TPB;
+  const int num_blocks = (total_work + num_threads - 1) / num_threads;
+
+  DPErrcheck(gpuGetLastError());
+  DPErrcheck(gpuDeviceSynchronize());
+  DPErrcheck(gpuMemset(dy_dem_x, 0, sizeof(FPTYPE) * total_work));
+  tabulate_fusion_se_t_tebd_grad_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<num_blocks, num_threads>>>(dy_dem_x, table, em_x, em, dy,
+                                    table_info[0], table_info[1], table_info[2],
+                                    table_info[3], table_info[4], nnei_i,
+                                    nnei_j, last_layer_size, total_work);
+  DPErrcheck(gpuGetLastError());
+  DPErrcheck(gpuDeviceSynchronize());
+}
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_tebd_grad_grad_gpu(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const int nloc,
+                                             const int nnei_i,
+                                             const int nnei_j,
+                                             const int last_layer_size) {
+  if (nloc <= 0 || nnei_i <= 0 || nnei_j <= 0) {
+    return;
+  }
+  const int_64 total_work = (int_64)nloc * nnei_i * nnei_j;
+  const int num_threads = TPB;
+  const int num_blocks = (total_work + num_threads - 1) / num_threads;
+
+  DPErrcheck(gpuGetLastError());
+  DPErrcheck(gpuDeviceSynchronize());
+  DPErrcheck(
+      gpuMemset(dz_dy, 0, sizeof(FPTYPE) * total_work * last_layer_size));
+
+  tabulate_fusion_se_t_tebd_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<num_blocks, num_threads>>>(dz_dy, table, em_x, em, dz_dy_dem_x,
+                                    table_info[0], table_info[1], table_info[2],
+                                    table_info[3], table_info[4], nnei_i,
+                                    nnei_j, last_layer_size, total_work);
+  DPErrcheck(gpuGetLastError());
+  DPErrcheck(gpuDeviceSynchronize());
+}
+
 template <typename FPTYPE>
 void tabulate_fusion_se_r_gpu(FPTYPE* out,
                               const FPTYPE* table,
@@ -1181,4 +1494,73 @@ template void tabulate_fusion_se_r_grad_grad_gpu<double>(
     const int nnei,
     const int last_layer_size);
 
+// Template instantiations for SE_T_TEBD GPU functions
+template void tabulate_fusion_se_t_tebd_gpu<float>(float* out,
+                                                   const float* table,
+                                                   const float* table_info,
+                                                   const float* em_x,
+                                                   const float* em,
+                                                   const int nloc,
+                                                   const int nnei_i,
+                                                   const int nnei_j,
+                                                   const int last_layer_size);
+
+template void tabulate_fusion_se_t_tebd_gpu<double>(double* out,
+                                                    const double* table,
+                                                    const double* table_info,
+                                                    const double* em_x,
+                                                    const double* em,
+                                                    const int nloc,
+                                                    const int nnei_i,
+                                                    const int nnei_j,
+                                                    const int last_layer_size);
+
+template void tabulate_fusion_se_t_tebd_grad_gpu<float>(
+    float* dy_dem_x,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+
+template void tabulate_fusion_se_t_tebd_grad_gpu<double>(
+    double* dy_dem_x,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+
+template void tabulate_fusion_se_t_tebd_grad_grad_gpu<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dz_dy_dem_x,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+
+template void tabulate_fusion_se_t_tebd_grad_grad_gpu<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dz_dy_dem_x,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+
 }  // namespace deepmd
diff --git a/source/lib/src/tabulate.cc b/source/lib/src/tabulate.cc
index 9352980351..e3b1b770ca 100644
--- a/source/lib/src/tabulate.cc
+++ b/source/lib/src/tabulate.cc
@@ -541,6 +541,156 @@ void deepmd::tabulate_fusion_se_t_grad_grad_cpu(FPTYPE* dz_dy,
   }
 }
 
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_t_tebd_cpu(FPTYPE* out,
+                                           const FPTYPE* table,
+                                           const FPTYPE* table_info,
+                                           const FPTYPE* em_x,
+                                           const FPTYPE* em,
+                                           const int nloc,
+                                           const int nnei_i,
+                                           const int nnei_j,
+                                           const int last_layer_size) {
+  memset(out, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j * last_layer_size);
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
+  const FPTYPE stride0 = table_info[3];
+  const FPTYPE stride1 = table_info[4];
+
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ii++) {
+    for (int jj = 0; jj < nnei_i; jj++) {
+      for (int kk = 0; kk < nnei_j; kk++) {
+        FPTYPE xx = em_x[ii * nnei_i * nnei_j + jj * nnei_j + kk];
+        int table_idx = 0;
+        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx,
+                       table_idx);
+
+        // For SE_TEBD, we preserve the full nt_i x nt_j x ng structure
+        // instead of reducing it like SE_T does
+        for (int mm = 0; mm < last_layer_size; mm++) {
+          FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * mm + 0];
+          FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * mm + 1];
+          FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * mm + 2];
+          FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
+          FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
+          FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
+
+          FPTYPE res = a0 + a1 * xx + a2 * xx * xx + a3 * xx * xx * xx +
+                       a4 * xx * xx * xx * xx + a5 * xx * xx * xx * xx * xx;
+
+          // Store result preserving the nt_i x nt_j structure
+          out[ii * nnei_i * nnei_j * last_layer_size +
+              jj * nnei_j * last_layer_size + kk * last_layer_size + mm] = res;
+        }
+      }
+    }
+  }
+}
+
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_t_tebd_grad_cpu(FPTYPE* dy_dem_x,
+                                                const FPTYPE* table,
+                                                const FPTYPE* table_info,
+                                                const FPTYPE* em_x,
+                                                const FPTYPE* em,
+                                                const FPTYPE* dy,
+                                                const int nloc,
+                                                const int nnei_i,
+                                                const int nnei_j,
+                                                const int last_layer_size) {
+  memset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j);
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
+  const FPTYPE stride0 = table_info[3];
+  const FPTYPE stride1 = table_info[4];
+
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ii++) {
+    for (int jj = 0; jj < nnei_i; jj++) {
+      for (int kk = 0; kk < nnei_j; kk++) {
+        FPTYPE xx = em_x[ii * nnei_i * nnei_j + jj * nnei_j + kk];
+        int table_idx = 0;
+        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx,
+                       table_idx);
+
+        FPTYPE grad_sum = 0.0;
+        for (int mm = 0; mm < last_layer_size; mm++) {
+          FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * mm + 1];
+          FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * mm + 2];
+          FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
+          FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
+          FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
+
+          FPTYPE dres_dxx = a1 + 2.0 * a2 * xx + 3.0 * a3 * xx * xx +
+                            4.0 * a4 * xx * xx * xx +
+                            5.0 * a5 * xx * xx * xx * xx;
+
+          FPTYPE dy_val =
+              dy[ii * nnei_i * nnei_j * last_layer_size +
+                 jj * nnei_j * last_layer_size + kk * last_layer_size + mm];
+          grad_sum += dy_val * dres_dxx;
+        }
+
+        dy_dem_x[ii * nnei_i * nnei_j + jj * nnei_j + kk] = grad_sum;
+      }
+    }
+  }
+}
+
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_t_tebd_grad_grad_cpu(
+    FPTYPE* dz_dy,
+    const FPTYPE* table,
+    const FPTYPE* table_info,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dz_dy_dem_x,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size) {
+  memset(dz_dy, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j * last_layer_size);
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
+  const FPTYPE stride0 = table_info[3];
+  const FPTYPE stride1 = table_info[4];
+
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ii++) {
+    for (int jj = 0; jj < nnei_i; jj++) {
+      for (int kk = 0; kk < nnei_j; kk++) {
+        FPTYPE xx = em_x[ii * nnei_i * nnei_j + jj * nnei_j + kk];
+        int table_idx = 0;
+        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx,
+                       table_idx);
+
+        FPTYPE dz_dy_dem_x_val =
+            dz_dy_dem_x[ii * nnei_i * nnei_j + jj * nnei_j + kk];
+
+        for (int mm = 0; mm < last_layer_size; mm++) {
+          FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * mm + 1];
+          FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * mm + 2];
+          FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
+          FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
+          FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
+
+          FPTYPE dres_dxx = a1 + 2.0 * a2 * xx + 3.0 * a3 * xx * xx +
+                            4.0 * a4 * xx * xx * xx +
+                            5.0 * a5 * xx * xx * xx * xx;
+
+          dz_dy[ii * nnei_i * nnei_j * last_layer_size +
+                jj * nnei_j * last_layer_size + kk * last_layer_size + mm] =
+              dz_dy_dem_x_val * dres_dxx;
+        }
+      }
+    }
+  }
+}
+
 template <typename FPTYPE>
 void deepmd::tabulate_fusion_se_r_cpu(FPTYPE* out,
                                       const FPTYPE* table,
@@ -810,7 +960,70 @@ template void deepmd::tabulate_fusion_se_t_grad_grad_cpu<double>(
     const int nnei_i,
     const int nnei_j,
     const int last_layer_size);
-
+template void deepmd::tabulate_fusion_se_t_tebd_cpu<float>(
+    float* out,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_tebd_cpu<double>(
+    double* out,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_tebd_grad_cpu<float>(
+    float* dy_dem_x,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_tebd_grad_cpu<double>(
+    double* dy_dem_x,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_tebd_grad_grad_cpu<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dz_dy_dem_x,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_tebd_grad_grad_cpu<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dz_dy_dem_x,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
 template void deepmd::tabulate_fusion_se_r_cpu<float>(
     float* out,
     const float* table,
diff --git a/source/lib/tests/CMakeLists.txt b/source/lib/tests/CMakeLists.txt
index f2fd8969d9..b8fbdcc00e 100644
--- a/source/lib/tests/CMakeLists.txt
+++ b/source/lib/tests/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.25.2)
 project(libdeepmd_test)
 
 file(GLOB TEST_SRC test_*.cc)
diff --git a/source/lib/tests/test_tabulate_se_t_tebd.cc b/source/lib/tests/test_tabulate_se_t_tebd.cc
new file mode 100644
index 0000000000..fe7f314119
--- /dev/null
+++ b/source/lib/tests/test_tabulate_se_t_tebd.cc
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include <gtest/gtest.h>
+
+#include <iostream>
+#include <vector>
+
+#include "device.h"
+#include "tabulate.h"
+#include "utilities.h"
+
+class TestTabulateSeTTebd : public ::testing::Test {
+ protected:
+  std::vector<double> table = {
+      -1.0600000163027882e02,  7.7059358807135015e02,   -5.6954714749735385e03,
+      1.2167808756610991e03,   -7.6199102434332218e01,  1.0706136029373441e00,
+      -1.0600000164528124e02,  7.7059358630452323e02,   -5.6954715659539552e03,
+      1.2167808757436076e03,   -7.6199099707724926e01,  1.0706134206080884e00,
+      -1.0600000163027882e02,  7.7059358807135015e02,   -5.6954714749735385e03,
+      1.2167808756610991e03,   -7.6199102434332218e01,  1.0706136029373441e00,
+      -1.0600000164528124e02,  7.7059358630452323e02,   -5.6954715659539552e03,
+      1.2167808757436076e03,   -7.6199099707724926e01,  1.0706134206080884e00,
+      -9.6000006759336443e01,  6.2969719646863621e02,   -4.2053706363664551e03,
+      9.0372155784831205e02,   -5.7600014239472898e01,  8.6528676197113796e-01,
+      -9.6000006828502180e01,  6.2969718981238339e02,   -4.2053709121998018e03,
+      9.0372156236848912e02,   -5.7600006817493266e01,  8.6528625106787871e-01,
+      -9.6000006759336443e01,  6.2969719646863621e02,   -4.2053706363664551e03,
+      9.0372155784831205e02,   -5.7600014239472898e01,  8.6528676197113796e-01,
+      -9.6000006828502180e01,  6.2969718981238339e02,   -4.2053709121998018e03,
+      9.0372156236848912e02,   -5.7600006817493266e01,  8.6528625106787871e-01,
+      -8.6000028021606425e01,  5.0303296429845562e02,   -3.0008648248894533e03,
+      6.4939597734382562e02,   -4.2250984019314707e01,  6.8180015607155764e-01,
+      -8.6000028340480625e01,  5.0303293978396903e02,   -3.0008656209622986e03,
+      6.4939600529391078e02,   -4.2250965541906716e01,  6.8179882734268982e-01,
+      -8.6000028021606425e01,  5.0303296429845562e02,   -3.0008648248894533e03,
+      6.4939597734382562e02,   -4.2250984019314707e01,  6.8180015607155764e-01,
+      -8.6000028340480625e01,  5.0303293978396903e02,   -3.0008656209622986e03,
+      6.4939600529353049e02,   -4.2250965541830588e01,  6.8179882733888086e-01,
+      -7.6000116148038558e01,  3.9060139597613619e02,   -2.0515743554479322e03,
+      4.4772754091167945e02,   -2.9848087537832814e01,  5.2014755686537917e-01,
+      -7.6000117618125429e01,  3.9060130821883052e02,   -2.0515765138621105e03,
+      4.4772766653712006e02,   -2.9848047259266409e01,  5.2014443989116910e-01,
+      -7.6000116148038558e01,  3.9060139597613619e02,   -2.0515743554479322e03,
+      4.4772754091167945e02,   -2.9848087537832814e01,  5.2014755686537917e-01,
+      -7.6000117618125742e01,  3.9060130821877993e02,   -2.0515765138659344e03,
+      4.4772766652483722e02,   -2.9848047256692499e01,  5.2014443976043645e-01,
+      -6.6000481290731443e01,  2.9240425245900917e02,   -1.3271250821434478e03,
+      2.9263955624337893e02,   -2.0087224005740719e01,  3.8031147992206349e-01,
+      -6.6000488067863742e01,  2.9240394960550276e02,   -1.3271304743966571e03,
+      2.9264002765325057e02,   -2.0087154325946980e01,  3.8030522013794582e-01,
+      -6.6000481290731443e01,  2.9240425245900917e02,   -1.3271250821434478e03,
+      2.9263955624337893e02,   -2.0087224005740719e01,  3.8031147992206349e-01,
+      -6.6000488067883694e01,  2.9240394960308691e02,   -1.3271304745319526e03,
+      2.9264002727267626e02,   -2.0087154245656002e01,  3.8030521605011575e-01,
+      -5.6001992867343972e01,  2.0844745574402617e02,   -7.9715799906587699e02,
+      1.7805563184427194e02,   -1.2663929104029080e01,  2.6224978307822894e-01,
+      -5.6002024103130161e01,  2.0844646075692629e02,   -7.9717003898786652e02,
+      1.7805715054974732e02,   -1.2663864677938077e01,  2.6224029170957303e-01,
+      -5.6001992867343972e01,  2.0844745574402617e02,   -7.9715799906587699e02,
+      1.7805563184427194e02,   -1.2663929104029080e01,  2.6224978307822894e-01,
+      -5.6002024104383771e01,  2.0844646064871867e02,   -7.9717004324410516e02,
+      1.7805714044473001e02,   -1.2663862524337585e01,  2.6224018166598279e-01,
+      -4.6008230210744550e01,  1.3874976550319553e02,   -4.3134867537287749e02,
+      9.7902623595157010e01,   -7.2734403121911884e00,  1.6589123996688057e-01,
+      -4.6008373996710617e01,  1.3874671965012058e02,   -4.3137141216256458e02,
+      9.7906861443792735e01,   -7.2735856084076280e00,  1.6588642735924275e-01,
+      -4.6008230210744550e01,  1.3874976550319553e02,   -4.3134867537287749e02,
+      9.7902623595157010e01,   -7.2734403121911884e00,  1.6589123996688057e-01,
+      -4.6008374075307870e01,  1.3874671513440606e02,   -4.3137152784492957e02,
+      9.7906652364871050e01,   -7.2735401377994249e00,  1.6588408717348646e-01,
+      -3.6033642533368131e01,  8.3364086172019398e01,   -1.9942175516407502e02,
+      4.6124022747838069e01,   -3.6130563858549958e00,  9.1249773312287188e-02,
+      -3.6034298111245583e01,  8.3355843868269616e01,   -1.9945266030093268e02,
+      4.6135000705962462e01,   -3.6142786797647353e00,  9.1293932043118198e-02,
+      -3.6033642533368131e01,  8.3364086172019398e01,   -1.9942175516407502e02,
+      4.6124022747838069e01,   -3.6130563858549958e00,  9.1249773312287188e-02,
+      -3.6034302998781108e01,  8.3355675173745269e01,   -1.9945516784358935e02,
+      4.6132303200740992e01,   -3.6136582565667807e00,  9.1261386291659793e-02,
+      -2.6132076703837274e01,  4.2398929436319683e01,   -7.1037171119057973e01,
+      1.3425662262407457e01,   -7.5172495708992593e-01, 7.7522572203268742e-03,
+      -2.6134776894873077e01,  4.2384732735328775e01,   -7.1030526549717337e01,
+      1.3431455085299461e01,   -7.5302028721199155e-01, 7.8186246126207160e-03,
+      -2.6132076703837274e01,  4.2398929436319683e01,   -7.1037171119057973e01,
+      1.3425662262405055e01,   -7.5172495708944420e-01, 7.7522572203027138e-03,
+      -2.6135071381093578e01,  4.2379566840123424e01,   -7.1067162844830236e01,
+      1.3434603316099608e01,   -7.5251233833488806e-01, 7.7734884077347950e-03,
+      -2.2221480705551805e01,  3.0067218434037404e01,   -4.1779705297521097e01,
+      -1.9077757705724110e02,  3.6413466026808294e02,   -1.6067397401486718e02,
+      -2.2225430071703467e01,  3.0060809113889512e01,   -4.1712800191721314e01,
+      -1.9084786311022177e02,  3.6410062714257685e02,   -1.6063028238785057e02,
+      -2.2221480705551830e01,  3.0067218434036263e01,   -4.1779705297545611e01,
+      -1.9077757705723738e02,  3.6413466026815809e02,   -1.6067397401492047e02,
+      -2.2226913938674084e01,  3.0042371820589185e01,   -4.1801582285426832e01,
+      -1.9048619249019526e02,  3.6373874557858261e02,   -1.6052358406417352e02,
+      -2.1250858373060836e01,  2.7343847665267702e01,   -3.6044215009418814e01,
+      -1.7618484800469861e02,  3.3120085405644409e02,   -1.4534825256321494e02,
+      -2.1254939505030809e01,  2.7342716030835884e01,   -3.5955450545431681e01,
+      -1.7635550119316844e02,  3.3127447930769307e02,   -1.4533876561022046e02,
+      -2.1250858373060954e01,  2.7343847665262818e01,   -3.6044215009514119e01,
+      -1.7618484800464822e02,  3.3120085405666612e02,   -1.4534825256338749e02,
+      -2.1257155379297881e01,  2.7317691772612619e01,   -3.6063526926252166e01,
+      -1.7588696592837897e02,  3.3079005662384850e02,   -1.4519086534447842e02,
+      -2.0283472228681301e01,  2.4763027042036295e01,   -3.0876160316998963e01,
+      -1.6184864900381874e02,  2.9976970905591691e02,   -1.3084395423768876e02,
+      -2.0287461515322455e01,  2.4769400540137131e01,   -3.0762734380983186e01,
+      -1.6214886052089241e02,  2.9998995088792128e02,   -1.3088331758129965e02,
+      -2.0283472228681809e01,  2.4763027042017129e01,   -3.0876160317336627e01,
+      -1.6184864900359682e02,  2.9976970905662938e02,   -1.3084395423826805e02,
+      -2.0290765181946348e01,  2.4735639907973120e01,   -3.0892738413082597e01,
+      -1.6154574482310053e02,  2.9934595420013272e02,   -1.3068028494926122e02,
+      -1.9319499689234629e01,  2.2323824431805683e01,   -2.6243395369841849e01,
+      -1.4782286378121026e02,  2.6985759662396487e02,   -1.1715474197881395e02,
+      -1.9323022570439292e01,  2.2340565860680357e01,   -2.6102786429129356e01,
+      -1.4828764857305418e02,  2.7027298759214750e02,   -1.1726163007473576e02,
+      -1.9319499689236839e01,  2.2323824431730525e01,   -2.6243395371031539e01,
+      -1.4782286378021576e02,  2.6985759662609979e02,   -1.1715474198068593e02,
+      -1.9327939259284843e01,  2.2295320666731183e01,   -2.6257097174199931e01,
+      -1.4751677383623073e02,  2.6942341041084092e02,   -1.1698575776762208e02,
+      -1.8359079763330211e01,  2.0025118950280675e01,   -2.2113826757823226e01,
+      -1.3415932552431914e02,  2.4147795894487624e02,   -1.0427314537549884e02,
+      -1.8361534194530734e01,  2.0055847278170305e01,   -2.1944107342764479e01,
+      -1.3482982214648752e02,  2.4214772485703989e02,   -1.0447085300268679e02,
+      -1.8359079763339750e01,  2.0025118949989704e01,   -2.2113826761939308e01,
+      -1.3415932552009582e02,  2.4147795895089951e02,   -1.0427314538136979e02,
+      -1.8368836959765495e01,  1.9995657614892380e01,   -2.2124533894067383e01,
+      -1.3385233293246981e02,  2.4103659293914149e02,   -1.0410011400771683e02,
+      -1.7402299525814517e01,  1.7865597763687486e01,   -1.8455503416511757e01,
+      -1.2090765118569301e02,  2.1464125749038132e02,   -9.2190581022134992e01,
+      -1.7402744551259310e01,  1.7914800567904472e01,   -1.8255754666855470e01,
+      -1.2183089355280822e02,  2.1563582256173194e02,   -9.2507405324257306e01,
+      -1.7402299525855486e01,  1.7865597762572605e01,   -1.8455503430527756e01,
+      -1.2090765116826699e02,  2.1464125750558804e02,   -9.2190581039770791e01,
+      -1.7413567239985614e01,  1.7835392747330133e01,   -1.8463115133795956e01,
+      -1.2060260469703572e02,  2.1419685510959093e02,   -9.2015134441585104e01,
+      -1.6449179896085464e01,  1.5843762224435309e01,   -1.5236722252652665e01,
+      -1.0811515163854509e02,  1.8935506712501905e02,   -8.0897437157402223e01,
+      -1.6446174965543889e01,  1.5916874201410112e01,   -1.5007553197461570e01,
+      -1.0934291295595986e02,  1.9075532567542470e02,   -8.1366596347119696e01,
+      -1.6449179896260411e01,  1.5843762220214204e01,   -1.5236722299508587e01,
+      -1.0811515156878269e02,  1.8935506715588940e02,   -8.0897437207525684e01,
+      -1.6462173655481337e01,  1.5813096619069219e01,   -1.5241142983208677e01,
+      -1.0781563484017332e02,  1.8891289499393798e02,   -8.0721658713418606e01,
+      -1.5499661595231082e01,  1.3957945516559789e01,   -1.2426145992195885e01,
+      -9.5826844741964834e01,  1.6562434781973772e02,   -7.0383233416004117e01,
+      -1.5491037589250178e01,  1.4061349904707843e01,   -1.2170301483989650e01,
+      -9.7412966929875139e01,  1.6751874597575440e02,   -7.1041920384880939e01,
+      -1.5499661595973759e01,  1.3957945500778198e01,   -1.2426146145776961e01,
+      -9.5826844470313858e01,  1.6562434784656404e02,   -7.0383233547510557e01,
+      -1.5514618579274794e01,  1.3927192540790591e01,   -1.2427264674287118e01,
+      -9.5537423121432880e01,  1.6519113036542510e02,   -7.0209783384625098e01,
+      -1.4553592409098401e01,  1.2206343505203831e01,   -9.9929274597052196e00,
+      -8.4085595900823435e01,  1.4345191724964303e02,   -6.0636862050381758e01,
+      -1.4536130507533649e01,  1.2347228125716077e01,   -9.7159302678980044e00,
+      -8.6081002959763751e01,  1.4592996741513730e02,   -6.1523840242331410e01,
+      -1.4553592412232879e01,  1.2206343446986155e01,   -9.9929279524397305e00,
+      -8.4085594870780753e01,  1.4345191706222485e02,   -6.0636862352071532e01,
+      -1.4570766853404239e01,  1.2175998366492486e01,   -9.9905856922863112e00,
+      -8.3812185051328299e01,  1.4303633648493073e02,   -6.0469165577726159e01,
+      -1.3610717065161962e01,  1.0587059629986399e01,   -7.9068321681349163e00,
+      -7.2932404423885004e01,  1.2283913327111270e02,   -5.1646910322317169e01,
+      -1.3579708436673444e01,  1.0773027159520954e01,   -7.6175370796795425e00,
+      -7.5376833196183071e01,  1.2597958225245242e02,   -5.2797863799745748e01,
+      -1.3610717078313911e01,  1.0587059418306087e01,   -7.9068337121483454e00,
+      -7.2932400620636059e01,  1.2283913169238102e02,   -5.1646910832841897e01,
+      -1.3630368323321786e01,  1.0557789879027116e01,   -7.9007777139483810e00,
+      -7.2682825476758552e01,  1.2245259140017740e02,   -5.1489446559796768e01,
+      -1.2670671078399982e01,  9.0981634949263963e00,   -6.1383490362855788e00,
+      -6.2406844162279825e01,  1.0378677653422224e02,   -4.3402055519687693e01,
+      -1.2619333100308433e01,  9.3364634226935799e00,   -5.8491811509717584e00,
+      -6.5316414528433455e01,  1.0763857666200300e02,   -4.4841832720191050e01,
+      -1.2670671133253135e01,  9.0981627374157021e00,   -6.1383537481895356e00,
+      -6.2406830503476570e01,  1.0378676818216074e02,   -4.3402055529436716e01,
+      -1.2693036794620980e01,  9.0708908225804148e00,   -6.1281713411274001e00,
+      -6.2191660620037396e01,  1.0344456594081470e02,   -4.3260806640248063e01,
+      -1.1732979767504439e01,  7.7377614739662697e00,   -4.6587775146685351e00,
+      -5.2547655563671029e01,  8.6296103981829802e01,   -3.5891515805495345e01,
+      -1.1651721415208119e01,  8.0340005825064456e00,   -4.3852919661646119e00,
+      -5.5898160750405737e01,  9.0851291378134590e01,   -3.7622755083739385e01,
+      -1.1732979994779518e01,  7.7377588120662892e00,   -4.6587914600219875e00,
+      -5.2547607987974565e01,  8.6296066930227624e01,   -3.5891510429190419e01,
+      -1.1758218632638741e01,  7.7137968422318544e00,   -4.6438239588320966e00,
+      -5.2381405657406454e01,  8.6019170302439520e01,   -3.5774653697918737e01,
+      -1.0797063195543267e01,  6.5040766534586290e00,   -3.4402783696562169e00,
+      -4.3393478931462226e01,  7.0370032342568010e01,   -2.9105535302381853e01,
+      -1.0672637254876815e01,  6.8603244928014488e00,   -3.1995767859681346e00,
+      -4.7101348454718874e01,  7.5530774605740319e01,   -3.1094453979913311e01,
+      -1.0797064129672576e01,  6.5040675030570139e00,   -3.4403181344841500e00,
+      -4.3393319126804485e01,  7.0369884883020177e01,   -2.9105501594155889e01,
+      -1.0825134802124644e01,  6.4853446725127366e00,   -3.4195560956016346e00,
+      -4.3296381389022351e01,  7.0187483762520671e01,   -2.9024415860031247e01,
+      -9.8622468030169337e00,  5.3955359781222549e00,   -2.4558741324534137e00,
+      -3.4983728078555984e01,  5.6014425934291204e01,   -2.3035887876475471e01,
+      -9.6769173769353625e00,  5.8079540801032961e00,   -2.2635143148159220e00,
+      -3.8890523502249145e01,  6.1563046720547966e01,   -2.5198820521877391e01,
+      -9.8622505990399034e00,  5.3955054149765509e00,   -2.4559821583353774e00,
+      -3.4983216045684472e01,  5.6013889382190079e01,   -2.3035736114340502e01,
+      -9.8926597117464805e00,  5.3849440641688187e00,   -2.4279562878572039e00,
+      -3.4983707025980287e01,  5.5966629574570753e01,   -2.3006306589550750e01,
+      -8.9277749780883457e00,  4.4108678323349286e00,   -1.6793815271288624e00,
+      -2.7359655656676122e01,  4.3239544183593061e01,   -1.7676416286664047e01,
+      -8.6587749152265552e00,  4.8674392165289442e00,   -1.5450097170494306e00,
+      -3.1230915545542118e01,  4.8829474992442343e01,   -1.9874755288141955e01,
+      -8.9277901202336185e00,  4.4107699183102085e00,   -1.6796551456533098e00,
+      -2.7358123514289456e01,  4.3237769027728554e01,   -1.7675844947587926e01,
+      -8.9590559763951383e00,  4.4128957610428623e00,   -1.6423658138809611e00,
+      -2.7493743583145054e01,  4.3380518846300511e01,   -1.7719639183506050e01,
+      -7.9928164326293913e00,  3.5492331091008302e00,   -1.0852462622393610e00,
+      -2.0565792757352423e01,  3.2061909496398073e01,   -1.3023704651715642e01,
+      -7.6125412569887647e00,  4.0287966748633526e00,   -1.0084592804412351e00,
+      -2.4116992333062022e01,  3.7252797603904497e01,   -1.5077495076198684e01,
+      -7.9928747817255603e00,  3.5489404571097585e00,   -1.0858609980296849e00,
+      -2.0561701094768868e01,  3.2056747083970720e01,   -1.3021877019728107e01,
+      -8.0213899495838241e00,  3.5708128515175943e00,   -1.0368753205735253e00,
+      -2.0877831538201836e01,  3.2456559535389509e01,   -1.3165540198118645e01,
+      -7.0564174984379102e00,  2.8104770395789380e00,   -6.4821407306458223e-01,
+      -1.4652118176169953e01,  2.2507145963021038e01,   -9.0780963613608154e00,
+      -6.5338936679228468e00,  3.2846161494194233e00,   -6.1760141818709846e-01,
+      -1.7606122820367215e01,  2.6855555289500277e01,   -1.0803821410528570e01,
+      -7.0566263531717324e00,  2.8097184139861691e00,   -6.4925197579297411e-01,
+      -1.4643483271177150e01,  2.2495243692983838e01,   -9.0734373052814821e00,
+      -7.0742646195707266e00,  2.8621047467298468e00,   -5.8641470402843421e-01,
+      -1.5178915176777426e01,  2.3211717123277591e01,   -9.3414295847965061e00,
+      -6.1172231064332783e00,  2.1957964102200167e00,   -3.4265643705632465e-01,
+      -9.6769153352706798e00,  1.4613873405033004e01,   -5.8450824172251430e00,
+      -5.4212678780860326e00,  2.6341589573018260e00,   -3.4085224757280796e-01,
+      -1.1835854891340576e01,  1.7794701474942944e01,   -7.1075278532253687e00,
+      -6.1178367984533244e00,  2.1945528943967396e00,   -3.4261268423617658e-01,
+      -9.6695829134679272e00,  1.4600877298870854e01,   -5.8381668136523013e00,
+      -6.1072022151656586e00,  2.2922503774685161e00,   -2.6715334266026142e-01,
+      -1.0408120531614587e01,  1.5617405440391840e01,   -6.2270636615178061e00,
+      -5.1722074807324017e00,  1.7098190643016411e00,   -1.4098618492175408e-01,
+      -5.7061337346696464e00,  8.4331806866534098e00,   -3.3349192888568142e00,
+      -4.2766424379800121e00,  2.0860564217794284e00,   -1.5548660419053545e-01,
+      -7.0034949575065015e00,  1.0332245608764421e01,   -4.0873492185766374e00,
+      -5.1727690165421372e00,  1.7132539127425084e00,   -1.2776576793785877e-01,
+      -5.7565343018918274e00,  8.4941254548170697e00,   -3.3479852132230872e00,
+      -5.0998839330979591e00,  1.8678855512825561e00,   -5.7718910331047868e-02,
+      -6.5095346397755423e00,  9.5462002113817768e00,   -3.7632628689263172e00,
+      -4.2112469382255613e00,  1.3675717927787789e00,   -9.4961575783498800e-03,
+      -2.7877417589321136e00,  3.9953503912711956e00,   -1.5499906707437840e00,
+      -3.1046711877098376e00,  1.6568346830533449e00,   -4.5990009889900242e-02,
+      -3.3140676307068091e00,  4.7472200808709299e00,   -1.8492173878772247e00,
+      -4.1976749320353317e00,  1.4246952243441517e00,   8.7531923058200650e-02,
+      -3.0996975434049761e00,  4.4668738099197531e00,   -1.7103055321708385e00,
+      -4.0163145894665320e00,  1.5923303121893606e00,   5.8249749369824022e-02,
+      -3.3748048713195491e00,  4.7925769874900315e00,   -1.8598420111853879e00,
+      -3.1955533414298376e00,  1.2168024121915868e00,   9.9474205814620603e-02,
+      -8.6811124876189694e-01, 1.1994338853723501e00,   -4.4837238870567747e-01,
+      -1.9098914522594992e00,  1.3654451552507061e00,   2.9537044429980407e-03,
+      -9.3701125207094127e-01, 1.2575365835116745e00,   -4.7248060681970733e-01,
+      -3.0285770502890443e00,  1.6166340190704305e00,   4.8662683065338386e-01,
+      -1.2308607057515726e00,  1.6114560066217587e00,   -6.5896729332189652e-01,
+      -2.8078044229222514e00,  1.4555130910035559e00,   9.0876948497501955e-02,
+      -1.0566809618626720e00,  1.3938154223720176e00,   -5.2279617091852160e-01,
+      -1.9963264755188566e00,  1.3672906754961440e00,   2.0801988470625002e-01,
+      2.0083818728351077e-02,  -1.5135587406137185e-02, -1.4175240342178652e-02,
+      -6.9344786794476854e-01, 1.2280621078720415e00,   1.2333381103148277e-02,
+      -1.0895386066093759e-02, 2.1764282171790141e-02,  -1.0106900291744604e-02,
+      -1.2036881930169383e00,  2.0482931230000392e00,   -1.2689218008973949e-01,
+      -5.0580690719339239e-01, 3.4047786101030464e-01,  -7.0959386937004015e-02,
+      -1.4470760938303664e00,  1.4285049373060201e00,   5.5764887956399375e-02,
+      -2.9461990750009881e-02, 2.3005167601875431e-02,  -1.0760396189439407e-02,
+      -4.3024292433642597e-01, 1.7121633497582587e00,   3.5705413032693957e-02,
+      -9.9216800479772127e-01, 1.5115432403429119e00,   -6.3985596276149748e-01,
+      5.4770961684437192e-01,  1.2565653391084903e00,   9.1639130181564755e-03,
+      -6.8547618650262643e-01, 1.2037212931265591e00,   -5.1526772142324506e-01,
+      4.8142431677326969e-01,  1.2842025505965851e00,   -3.1103960497811806e-01,
+      -3.8667287940463613e-01, 9.2663039525338942e-01,  -4.1330437951972537e-01,
+      1.9976512094478704e-02,  1.4898674304290889e00,   -2.1940405767858565e-03,
+      -8.0791207141984167e-01, 1.3979310081478775e00,   -5.9845265079421794e-01,
+      1.1971451112382212e00,   1.6539633089946477e00,   -2.7009878691796618e-01,
+      -2.8868139196850624e00,  4.7294193613612734e00,   -1.9578020397520424e00,
+      1.8164162541717044e00,   1.4570111710269262e00,   2.2385898037164991e-02,
+      -3.1195681762439769e00,  4.9723722392038878e00,   -2.0423972644796100e00,
+      1.5812403987207633e00,   1.1421043858413655e00,   -4.4319666868952730e-02,
+      -2.3144705949527720e00,  3.7448930479898297e00,   -1.5426803544433196e00,
+      1.4992161878806018e00,   1.6612039136364238e00,   -2.2870713891204597e-02,
+      -3.4442115437939465e00,  5.5057190995408973e00,   -2.2657208348376137e00,
+      2.4658130352390710e00,   1.5819912227884063e00,   -1.3204477532594588e-01,
+      -5.7752803465671017e00,  9.0677018990478242e00,   -3.6843468204828174e00,
+      3.1062201217160963e00,   1.8205810727868250e00,   7.3942159732456811e-02,
+      -7.3418038323250947e00,  1.1309154676354810e01,   -4.5733470083866452e00,
+      2.5667672162869133e00,   1.3762236869878626e00,   5.4823291778512563e-02,
+      -5.5558964069977943e00,  8.5620133672289516e00,   -3.4575259608624478e00,
+      2.9333361085351610e00,   1.9771000784477066e00,   2.1600903596218385e-02,
+      -7.7786452012965430e00,  1.2026327126407146e01,   -4.8722408979121159e00,
+      3.5238342146994350e00,   1.8411341262124141e00,   1.0485737443151430e-01,
+      -1.0316470080846322e01,  1.5628354265192609e01,   -6.2547428286449396e00,
+      4.3947471898784478e00,   2.3129375587624681e00,   1.6998863701958250e-01,
+      -1.3069120913924280e01,  1.9764673064124775e01,   -7.9234176878170990e00,
+      3.5464051944219954e00,   1.7786047141550632e00,   1.8395466553434961e-01,
+      -1.0256713338978345e01,  1.5450540198835597e01,   -6.1709943751208902e00,
+      4.3074781177775723e00,   2.4284702978185178e00,   1.2121907902830774e-01,
+      -1.3510697720561426e01,  2.0490823414440431e01,   -8.2265504110307699e00,
+      4.5269670710447079e00,   2.3411415500822019e00,   3.7814443659878427e-01,
+      -1.6533454371385766e01,  2.4532574055181296e01,   -9.7222898630871342e00,
+      5.6498078480438974e00,   2.8871559084424092e00,   3.1648740182441881e-01,
+      -1.9832336139347099e01,  2.9630584562783888e01,   -1.1804975183138390e01,
+      4.5317970588477650e00,   2.3235629480266455e00,   4.0711209040396701e-01,
+      -1.6523611973754900e01,  2.4482080409856291e01,   -9.6968326211377835e00,
+      5.6107427774726322e00,   2.9693568967987254e00,   2.6856229367890733e-01,
+      -2.0186235796983127e01,  3.0228033555488111e01,   -1.2057362656117963e01,
+      5.5230828784340904e00,   3.0159142144119913e00,   7.5032702265793638e-01,
+      -2.4452361306480910e01,  3.5745746299744695e01,   -1.4059387633540990e01,
+      6.8467243986091164e00,   3.5205846294935204e00,   5.5323452910250115e-01,
+      -2.7424447720726722e01,  4.0542113968978946e01,   -1.6058340606199877e01,
+      5.5241079122419858e00,   3.0111097413061287e00,   7.6043241689918206e-01,
+      -2.4453330947201032e01,  3.5733842835424838e01,   -1.4052622761934279e01,
+      6.8330970703372866e00,   3.5730950345697865e00,   5.0442967447855436e-01,
+      -2.7630302835415993e01,  4.0921397061842079e01,   -1.6223699529825666e01,
+      6.5233214752268127e00,   3.8455313715589599e00,   1.2738445662734672e00,
+      -3.4142511056048967e01,  4.9288751118195229e01,   -1.9258816488331760e01,
+      7.9798691992574877e00,   4.2304633704347614e00,   9.4916911879724064e-01,
+      -3.6082800915305256e01,  5.2740474636382487e01,   -2.0757970588732530e01,
+      6.5235391967368317e00,   3.8442392655293900e00,   1.2772689685023881e00,
+      -3.4144245582802192e01,  4.9286600694030149e01,   -1.9257235266278844e01,
+      7.9780164759860508e00,   4.2581364755189171e00,   9.0490824102641643e-01,
+      -3.6146890048111374e01,  5.2902251888236343e01,   -2.0834714063750525e01,
+      7.5301209868737518e00,   4.8266093670811516e00,   1.9906532239804082e00,
+      -4.5696171225139402e01,  6.5222794336738914e01,   -2.5330008845677121e01,
+      9.0592048208341964e00,   5.0524444639807982e00,   1.5639083038511417e00,
+      -4.6227354827270197e01,  6.6742768625790532e01,   -2.6090733281390481e01,
+      7.5301672757177256e00,   4.8262668988539703e00,   1.9917837214882572e00,
+      -4.5697152262800707e01,  6.5222641787790508e01,   -2.5329699752317662e01,
+      9.0617089689058279e00,   5.0627200474303731e00,   1.5306087886050987e00,
+      -4.6201245261995687e01,  6.6753711704174307e01,   -2.6103836713323240e01,
+      8.5439978438576958e00,   5.9605352581937785e00,   2.9388171122244109e00,
+      -5.9213652478598007e01,  8.3623964589400401e01,   -3.2288651007290504e01,
+      1.0100238105795977e01,   6.0156046860821641e00,   2.4311227628788585e00,
+      -5.8189717323516248e01,  8.2972590004142106e01,   -3.2212869674305303e01,
+      8.5440076687321067e00,   5.9604459430021439e00,   2.9391801366526531e00,
+      -5.9214078468041464e01,  8.3624068891376510e01,   -3.2288610777657510e01,
+      1.0103667533796683e01,   6.0158650887345448e00,   2.4107760944314816e00,
+      -5.8125625048064265e01,  8.2906979417176174e01,   -3.2191629006406409e01,
+      9.5650113177877785e00,   7.2498153679976820e00,   4.1551371399277919e00,
+      -7.4795843598083408e01,  1.0457037732454131e02,   -4.0151433068943419e01,
+      1.1116968561077568e01,   7.1347098863330896e00,   3.5688140741297674e00,
+      -7.2151486218593305e01,  1.0165680693075836e02,   -3.9206269356622016e01,
+      9.5650133940644455e00,   7.2497924894015711e00,   4.1552503042122613e00,
+      -7.4796005009548836e01,  1.0457044971811401e02,   -4.0151435976986221e01,
+      1.1120034079668221e01,   7.1303147700774092e00,   3.5594873892317103e00,
+      -7.2082067018068685e01,  1.0156598726189708e02,   -3.9171834664292227e01,
+      1.0593064483227742e01,   8.6969028070512202e00,   5.6755396034912966e00,
+      -9.2539537763180832e01,  1.2813560149579646e02,   -4.8933613418447223e01,
+      1.2119543877083460e01,   8.4137603187360543e00,   4.9925034366798311e00,
+      -8.8194505075704640e01,  1.2287993196505218e02,   -4.7096724506223822e01,
+      1.0593064919257221e01,   8.6968970567044934e00,   5.6755738143875760e00,
+      -9.2539593640863643e01,  1.2813563331215474e02,   -4.8933618162805772e01,
+      1.2121921818513506e01,   8.4078642204619420e00,   4.9908632634858190e00,
+      -8.8134432374832016e01,  1.2279086550380391e02,   -4.7060844505587738e01,
+      1.1627957207938659e01,   1.0303707615441018e01,   7.5344011042552923e00,
+      -1.1253294830348190e02,  1.5438372244089408e02,   -5.8647453529357783e01,
+      1.3114510015623049e01,   9.8513572940713416e00,   6.7213349376406626e00,
+      -1.0635738219113546e02,  1.4665751311861146e02,   -5.5881528760137869e01,
+      1.1627957298834614e01,   1.0303706197478814e01,   7.5344111366673712e00,
+      -1.1253296638384563e02,  1.5438373415898508e02,   -5.8647455853629580e01,
+      1.3116237925845430e01,   9.8455331102145145e00,   6.7243141059359051e00,
+      -1.0631074264006560e02,  1.4658112805680690e02,   -5.5849452095162235e01,
+      1.2669386535689361e01,   1.2071287030293307e01,   9.7633555455962835e00,
+      -1.3485075345900265e02,  1.8336444946299886e02,   -6.9300787627414508e01,
+      1.4105804414673191e01,   1.1444289269702800e01,   8.7789794745243590e00,
+      -1.2666835962860844e02,  1.7298274034188972e02,   -6.5547771558832267e01,
+      1.2669386554490638e01,   1.2071286687068984e01,   9.7633584027450482e00,
+      -1.3485075900242089e02,  1.8336445335820781e02,   -6.9300788508071975e01,
+      1.4107018463574896e01,   1.1439185153305873e01,   8.7843335749580440e00,
+      -1.2663444344319166e02,  1.7292158897636148e02,   -6.5521162694327174e01,
+      1.3716937488160630e01,   1.3999597459400730e01,   1.2389915672436279e01,
+      -1.5954894249539399e02,  2.1510813446746886e02,   -8.0895567204040049e01,
+      1.5095682313349364e01,   1.3189272906323732e01,   1.1192627051714643e01,
+      -1.4915916817312757e02,  2.0184825850919157e02,   -7.6081293415969839e01,
+      1.3716937492019641e01,   1.3999597377767842e01,   1.2389916464009524e01,
+      -1.5954894412085929e02,  2.1510813567394996e02,   -8.0895567498068928e01,
+      1.5096520030681436e01,   1.3185064407456906e01,   1.1198910160279951e01,
+      -1.4913565617175487e02,  2.0180124290250004e02,   -7.6060129778156622e01,
+      1.4770075388032444e01,   1.6087303167766446e01,   1.5436222950666867e01,
+      -1.8666021493779203e02,  2.4962122089688103e02,   -9.3426463524457304e01,
+      1.6085379191481852e01,   1.5083589447287226e01,   1.3991739427782750e01,
+      -1.7386892459375579e02,  2.3325385095807121e02,   -8.7470099643500802e01,
+      1.4770075388818769e01,   1.6087303148664304e01,   1.5436223164442264e01,
+      -1.8666021539675981e02,  2.4962122125116741e02,   -9.3426463615076329e01,
+      1.6085951551006787e01,   1.5080238931969067e01,   1.3998101278449143e01,
+      -1.7385331837944693e02,  2.3321864790104019e02,   -8.7453697552144448e01,
+      1.5828143941097450e01,   1.8331670220961666e01,   1.8918268274003861e01,
+      -2.1619095210442941e02,  2.8688297635978756e02,   -1.0687973526499771e02,
+      1.7075534787366465e01,   1.7125200136366264e01,   1.7207074959934751e01,
+      -2.0084388544719391e02,  2.6720765911058965e02,   -9.9705133726570395e01,
+      1.5828143941256627e01,   1.8331670216557445e01,   1.8918268330404022e01,
+      -2.1619095222989833e02,  2.8688297645950814e02,   -1.0687973529137253e02,
+      1.7075923730873765e01,   1.7122590193964911e01,   1.7213058024904747e01,
+      -2.0083402645820061e02,  2.6718180837697332e02,   -9.9692640534772679e01,
+      1.6890371426423382e01,   2.0728579569842751e01,   2.2845917469463828e01,
+      -2.4812083435502871e02,  3.2684448823688496e02,   -1.2123263616047282e02,
+      1.8066449820492846e01,   1.9312661524160735e01,   2.0870036016187061e01,
+      -2.3013589616073858e02,  3.0372498377642154e02,   -1.1277999824352135e02,
+      1.6890371426455424e01,   2.0728579568840633e01,   2.2845917484032956e01,
+      -2.4812083438838550e02,  3.2684448826399682e02,   -1.2123263616782057e02,
+      1.8066713333743454e01,   1.9310657703202459e01,   2.0875423564416035e01,
+      -2.3013008228413184e02,  3.0370630494679148e02,   -1.1277060230387309e02,
+      1.7955886187113396e01,   2.3272683588860026e01,   2.7223982220959247e01,
+      -2.8240595076334000e02,  3.6943078590316281e02,   -1.3645364576977221e02,
+      1.9058236733002300e01,   2.1644988962398710e01,   2.5012267757287322e01,
+      -2.6180071928343307e02,  3.4282650121799617e02,   -1.2669036882336400e02,
+      1.7955886187119816e01,   2.3272683588634656e01,   2.7223982224651898e01,
+      -2.8240595077199526e02,  3.6943078591032139e02,   -1.3645364577174797e02,
+      1.9058414960148450e01,   2.1643466247439289e01,   2.5016983354038196e01,
+      -2.6179767020610126e02,  3.4281320617581565e02,   -1.2668337355331974e02,
+      1.9023741366983238e01,   2.5957710504548576e01,   3.2054387652193789e01,
+      -3.1898571318422574e02,  4.1454655650462962e02,   -1.5250373535684176e02,
+      2.0050906563887416e01,   2.4121527381838824e01,   2.9665428981325245e01,
+      -2.9589665055055406e02,  3.8453661583827250e02,   -1.4143340987287985e02,
+      1.9023741366984520e01,   2.5957710504498362e01,   3.2054387653114766e01,
+      -3.1898571318642672e02,  4.1454655650647550e02,   -1.5250373535735841e02,
+      2.0051026978020587e01,   2.4120379273875816e01,   2.9669474257430963e01,
+      -2.9589543070583102e02,  3.8452729731205977e02,   -1.4142824748467820e02,
+      2.0092947487287756e01,   2.8776895490568755e01,   3.7339233558876920e01,
+      -9.8781982607414882e00,  7.0916635282296292e-01,  -1.2340880155534291e-02,
+      2.1044418341890132e01,   2.6741847681518077e01,   3.4861073630499796e01,
+      -9.1700568642165461e00,  6.5220324713443967e-01,  -1.1045071585279443e-02,
+      2.0092947487288011e01,   2.8776895490557653e01,   3.7339233559103448e01,
+      -9.8781982608033179e00,  7.0916635282857932e-01,  -1.2340880155703077e-02,
+      2.1044499630877905e01,   2.6740987496092696e01,   3.4864491165514394e01,
+      -9.1707199731434574e00,  6.5223741134844682e-01,  -1.1045188698410773e-02,
+      2.1162510215379026e01,   3.1723491960797684e01,   4.3084295875067085e01,
+      -4.1033675985379521e00,  -6.6095139594000130e-01, 6.0977735530407223e-02,
+      2.2038706806958309e01,   2.9505670300337073e01,   4.0630600131872811e01,
+      -2.7905442844326718e00,  -8.3885972791335117e-01, 6.8309956404426039e-02,
+      2.1162510215379076e01,   3.1723491960795304e01,   4.3084295875120795e01,
+      -4.1033675985539224e00,  -6.6095139593840913e-01, 6.0977735530354210e-02,
+      2.2038761643178379e01,   2.9505029336592230e01,   4.0633451796171073e01,
+      -2.7913314472201640e00,  -8.3878528163749511e-01, 6.8307595298566767e-02,
+      3.1719012432820758e01,   6.7480322661109355e01,   1.3318978565899991e02,
+      -1.6791944323404795e01,  -1.0181217992701848e00,  1.2989592638281225e-01,
+      3.2009499874031789e01,   6.5013296175889408e01,   1.3669799889514238e02,
+      -1.7009031615065428e01,  -1.0689880784706638e00,  1.3388972346122466e-01,
+      3.1719012432820758e01,   6.7480322661109355e01,   1.3318978565899991e02,
+      -1.6791944323404795e01,  -1.0181217992701848e00,  1.2989592638281225e-01,
+      3.2009500887769519e01,   6.5013269472322307e01,   1.3669829238273672e02,
+      -1.7009116366540379e01,  -1.0689798256828462e00,  1.3388945486998777e-01,
+      4.1931127118492086e01,   1.1600186087954401e02,   3.1751764022286790e02,
+      -4.6438894455748802e01,  -8.7599401950869438e-01, 2.2297105562740663e-01,
+      4.2002297497564768e01,   1.1479764873768737e02,   3.2393143797302810e02,
+      -4.7847299173836262e01,  -7.8150712905299369e-01, 2.2131248436241077e-01,
+      4.1931127118492086e01,   1.1600186087954401e02,   3.1751764022286790e02,
+      -4.6438894455748802e01,  -8.7599401950869438e-01, 2.2297105562740663e-01,
+      4.2002297514594851e01,   1.1479764793294436e02,   3.2393145467669495e02,
+      -4.7847304068128608e01,  -7.8150664807362491e-01, 2.2131246858403722e-01,
+      5.1984670105634827e01,   1.7926303194781252e02,   6.2846495111925287e02,
+      -1.0034649475039414e02,  2.4606292097951082e-01,  3.3256752105517051e-01,
+      5.2000554052128159e01,   1.7883235795593501e02,   6.3273302895025176e02,
+      -1.0138733878813618e02,  3.2804187851642969e-01,  3.3055293107858102e-01,
+      5.1984670105634827e01,   1.7926303194781252e02,   6.2846495111925287e02,
+      -1.0034649475039414e02,  2.4606292097951082e-01,  3.3256752105517051e-01,
+      5.2000554052402805e01,   1.7883235793562420e02,   6.3273302962903426e02,
+      -1.0138733898825184e02,  3.2804189825766372e-01,  3.3055293042886030e-01,
+      6.1996666427075382e01,   2.5724136589119979e02,   1.0913830717468406e03,
+      -1.8317243758181812e02,  2.5193786568880601e00,   4.6277932792022042e-01,
+      6.2000133522892554e01,   2.5710536851489377e02,   1.0934673032018356e03,
+      -1.8370056934287794e02,  2.5630609198690104e00,   4.6162176037505448e-01,
+      6.1996666427075382e01,   2.5724136589119979e02,   1.0913830717468406e03,
+      -1.8317243758181812e02,  2.5193786568880601e00,   4.6277932792022042e-01,
+      6.2000133522896938e01,   2.5710536851442714e02,   1.0934673032246803e03,
+      -1.8370056934963364e02,  2.5630609205366826e00,   4.6162176035304603e-01,
+      7.1999279107664492e01,   3.4965254984584158e02,   1.7356304176273381e03,
+      -3.0063395678020430e02,  6.2079056750108883e00,   6.1505333334154833e-01,
+      7.2000032172982571e01,   3.4961232791697932e02,   1.7365043785874466e03,
+      -3.0086002522613632e02,  6.2270725229979789e00,   6.1452738833821030e-01,
+      7.1999279107664492e01,   3.4965254984584158e02,   1.7356304176273381e03,
+      -3.0063395678020430e02,  6.2079056750108883e00,   6.1505333334154833e-01,
+      7.2000032172982642e01,   3.4961232791696904e02,   1.7365043785881401e03,
+      -3.0086002522634379e02,  6.2270725230187063e00,   6.1452738833751985e-01,
+      8.1999844359310714e01,   4.5636323545227941e02,   2.5918884526432239e03,
+      -4.5885344883307727e02,  1.1616256691917803e01,   7.8948404417119522e-01,
+      8.2000007751936337e01,   4.5635184072744744e02,   2.5922210189842476e03,
+      -4.5894061525528980e02,  1.1623761628208563e01,   7.8927378661620728e-01,
+      8.1999844359310714e01,   4.5636323545227941e02,   2.5918884526432239e03,
+      -4.5885344883307727e02,  1.1616256691917803e01,   7.8948404417119522e-01,
+      8.2000007751936337e01,   4.5635184072744744e02,   2.5922210189842476e03,
+      -4.5894061525528980e02,  1.1623761628208563e01,   7.8927378661620728e-01};
+
+  // Table info parameters
+  std::vector<double> table_info = {
+      -2.1000000000000000e01, 2.1000000000000000e01, 1.0500000000000000e02,
+      1.0000000000000000e00,  1.0000000000000000e01, -1.0000000000000000e00};
+
+  // Environment matrix data (em_x)
+  std::vector<double> em_x = {
+      9.3816147034272368e-01,  -1.6703373029862567e-01, -4.4294526064601734e-02,
+      -2.8798505489184573e-01, -1.6703373029862567e-01, 9.2489218226366088e-01,
+      -2.8928196536572048e-01, -4.7833509099876154e-01, -4.4294526064601734e-02,
+      -2.8928196536572048e-01, 5.7034320185695120e-01,  1.8771147911830000e-01,
+      -2.8798505489184573e-01, -4.7833509099876154e-01, 1.8771147911830000e-01,
+      4.0174654365823070e-01,  8.4370316144902313e-01,  -3.7813146789689916e-02,
+      -3.6989397568296523e-01, -4.0554075086539937e-01, -3.7813146789689916e-02,
+      6.5766402633747112e-01,  -4.2312966361682885e-01, 1.2685067374257861e-01,
+      -3.6989397568296523e-01, -4.2312966361682885e-01, 6.0171576901660107e-01,
+      9.8283160997298613e-02,  -4.0554075086539937e-01, 1.2685067374257861e-01,
+      9.8283160997298613e-02,  2.1324148100625978e-01,  9.7843596341516559e-01,
+      -1.0492833888237871e-01, -1.0538688914576379e-01, -2.0453551592353389e-01,
+      -1.0492833888237871e-01, 7.7943976693565231e-01,  -1.5898500035781410e-01,
+      9.4834209331437741e-02,  -1.0538688914576379e-01, -1.5898500035781410e-01,
+      7.4778071691708869e-01,  -6.1895255142095873e-01, -2.0453551592353389e-01,
+      9.4834209331437741e-02,  -6.1895255142095873e-01, 6.0844713798743799e-01,
+      1.0079020879244640e00,   -2.3855984150631487e-01, -3.4608276043004524e-02,
+      -4.7448768267289088e-01, -2.3855984150631487e-01, 4.9732018171028253e-01,
+      -3.1320787082485729e-01, -1.4528004145602180e-01, -3.4608276043004524e-02,
+      -3.1320787082485729e-01, 4.7696729363954582e-01,  1.1723268074231248e-01,
+      -4.7448768267289088e-01, -1.4528004145602180e-01, 1.1723268074231248e-01,
+      4.0511515406019899e-01};
+
+  // Environment matrix data (em) - same as em_x reshaped to 4x4x4
+  std::vector<double> em = em_x;
+
+  // Expected outputs
+  std::vector<double> expected_xyz_scatter = {
+      -2.079966061311932624e00, -7.692086176232872941e-01,
+      -1.330535805782471437e00, -1.535049115492904015e00,
+      -3.387998402723848113e00, -2.128016321720361592e00,
+      -3.277106691660685023e00, -3.038592571524457941e00,
+      -3.249069620732517372e00, -1.970139560184089200e00,
+      -3.099042807698376478e00, -2.871870824895611740e00,
+      -3.505396485462592526e00, -2.262865452602483174e00,
+      -3.422716794166150045e00, -3.179802052814081126e00,
+      -3.387998402723848113e00, -2.128016321720361592e00,
+      -3.277106691660685023e00, -3.038592571524457941e00,
+      -2.097644546971316970e00, -7.853817517282856153e-01,
+      -1.357716376463428354e00, -1.553790419377535814e00,
+      -3.506551159314416299e00, -2.264198868197136782e00,
+      -3.424124719375644776e00, -3.181191625971385051e00,
+      -3.660169689351369549e00, -2.443224371314374199e00,
+      -3.605953674751581683e00, -3.365824124673929774e00,
+      -3.249069620732517372e00, -1.970139560184089200e00,
+      -3.099042807698376478e00, -2.871870824895611740e00,
+      -3.506551159314416299e00, -2.264198868197136782e00,
+      -3.424124719375644776e00, -3.181191625971385051e00,
+      -2.530401019792652750e00, -1.199448737241302965e00,
+      -2.045857780281324256e00, -2.028209748272746360e00,
+      -2.967997665475758939e00, -1.658223979429474859e00,
+      -2.714263642699213275e00, -2.536766176574209553e00,
+      -3.505396485462592526e00, -2.262865452602483174e00,
+      -3.422716794166150045e00, -3.179802052814081126e00,
+      -3.660169689351369549e00, -2.443224371314374199e00,
+      -3.605953674751581683e00, -3.365824124673929774e00,
+      -2.967997665475758939e00, -1.658223979429474859e00,
+      -2.714263642699213275e00, -2.536766176574209553e00,
+      -2.720389222561622855e00, -1.393795402757894042e00,
+      -2.345287611062552902e00, -2.246069186600956336e00,
+      -2.203412666452015500e00, -8.832922757908097111e-01,
+      -1.522624038797550927e00, -1.666955513084583895e00,
+      -3.241305103472522209e00, -1.961375736752476673e00,
+      -3.088893233422514317e00, -2.862569539842237276e00,
+      -3.574866904199356821e00, -2.343386670122550530e00,
+      -3.506415378108253655e00, -3.263388258370226502e00,
+      -3.603390794940432240e00, -2.376642828344087466e00,
+      -3.540123958621633538e00, -3.297678764698442677e00,
+      -3.241305103472522209e00, -1.961375736752476673e00,
+      -3.088893233422514317e00, -2.862569539842237276e00,
+      -2.429999277303452665e00, -1.100023053295984932e00,
+      -1.884632599239140616e00, -1.915409451705063049e00,
+      -3.617225590985869044e00, -2.392818241566808002e00,
+      -3.556322178869617989e00, -3.314299042117806238e00,
+      -3.041076611338295876e00, -1.738238794499435524e00,
+      -2.817792369079202164e00, -2.623522438271889357e00,
+      -3.574866904199356821e00, -2.343386670122550530e00,
+      -3.506415378108253655e00, -3.263388258370226502e00,
+      -3.617225590985869044e00, -2.392818241566808002e00,
+      -3.556322178869617989e00, -3.314299042117806238e00,
+      -2.494628058196212361e00, -1.163767319481099349e00,
+      -1.988521437257453117e00, -1.987826554800365741e00,
+      -3.075717632972006399e00, -1.776439224176796783e00,
+      -2.866012767430887731e00, -2.664752088723873147e00,
+      -3.603390794940432240e00, -2.376642828344087466e00,
+      -3.540123958621633538e00, -3.297678764698442677e00,
+      -3.041076611338295876e00, -1.738238794499435524e00,
+      -2.817792369079202164e00, -2.623522438271889357e00,
+      -3.075717632972006399e00, -1.776439224176796783e00,
+      -2.866012767430887731e00, -2.664752088723873147e00,
+      -2.937692581645545253e00, -1.625281447224701248e00,
+      -2.670609348993408627e00, -2.500890937682363369e00,
+      -2.025708717938600856e00, -7.199180117105589138e-01,
+      -1.247903242801984502e00, -1.477847643583491077e00,
+      -3.319906298979989234e00, -2.050399877432793616e00,
+      -3.190625595262678527e00, -2.956812988881384996e00,
+      -3.320427161433143759e00, -2.050991936909778435e00,
+      -3.191292681270106346e00, -2.957438103019588649e00,
+      -3.426553578385282073e00, -2.172154331535164129e00,
+      -3.325432621990271009e00, -3.084944527573738160e00,
+      -3.319906298979989234e00, -2.050399877432793616e00,
+      -3.190625595262678527e00, -2.956812988881384996e00,
+      -2.284064566837665566e00, -9.593000707401009297e-01,
+      -1.650522869993226838e00, -1.754438009603198623e00,
+      -3.379465923571113084e00, -2.118266849034519783e00,
+      -3.266349102610927257e00, -3.028338829309447533e00,
+      -3.079911054493713163e00, -1.781075057846572385e00,
+      -2.871812857785094764e00, -2.669747195759931468e00,
+      -3.320427161433143759e00, -2.050991936909778435e00,
+      -3.191292681270106346e00, -2.957438103019588649e00,
+      -3.379465923571113084e00, -2.118266849034519783e00,
+      -3.266349102610927257e00, -3.028338829309447533e00,
+      -2.322820890996963428e00, -9.962581166315247483e-01,
+      -1.712462166775041483e00, -1.796847406967182925e00,
+      -3.773974160346314832e00, -2.578147069133936320e00,
+      -3.733153897787112818e00, -3.501723916740695497e00,
+      -3.426553578385282073e00, -2.172154331535164129e00,
+      -3.325432621990271009e00, -3.084944527573738160e00,
+      -3.079911054493713163e00, -1.781075057846572385e00,
+      -2.871812857785094764e00, -2.669747195759931468e00,
+      -3.773974160346314832e00, -2.578147069133936320e00,
+      -3.733153897787112818e00, -3.501723916740695497e00,
+      -2.486913710376590814e00, -1.156110267723822860e00,
+      -1.976136357370776642e00, -1.979145885578800446e00,
+      -1.985509025147957018e00, -6.837428483499845555e-01,
+      -1.187510572446436763e00, -1.435784454536765731e00,
+      -3.459824255787086411e00, -2.210355755872039030e00,
+      -3.366749809979232122e00, -3.124964584278840274e00,
+      -3.237455343632644311e00, -1.957033195714938278e00,
+      -3.083852092710132720e00, -2.857958563155091891e00,
+      -3.657182553291094873e00, -2.439708494346079171e00,
+      -3.602533119650989324e00, -3.362243802860200859e00,
+      -3.459824255787086411e00, -2.210355755872039030e00,
+      -3.366749809979232122e00, -3.124964584278840274e00,
+      -2.612858501561634128e00, -1.282799717023056063e00,
+      -2.177105827922439119e00, -2.122088127377702360e00,
+      -3.527506999611477845e00, -2.288426034556684474e00,
+      -3.449583412378998126e00, -3.206410310763728511e00,
+      -3.364731487733799042e00, -2.101446556578417724e00,
+      -3.247719091239923639e00, -3.010635929634002395e00,
+      -3.237455343632644311e00, -1.957033195714938278e00,
+      -3.083852092710132720e00, -2.857958563155091891e00,
+      -3.527506999611477845e00, -2.288426034556684474e00,
+      -3.449583412378998126e00, -3.206410310763728511e00,
+      -2.635737441843280848e00, -1.306200303853801703e00,
+      -2.213214111467705525e00, -2.148325634854432398e00,
+      -3.052719273633249131e00, -1.751058696872038523e00,
+      -2.834060115075742292e00, -2.637372572375544344e00,
+      -3.657182553291094873e00, -2.439708494346079171e00,
+      -3.602533119650989324e00, -3.362243802860200859e00,
+      -3.364731487733799042e00, -2.101446556578417724e00,
+      -3.247719091239923639e00, -3.010635929634002395e00,
+      -3.052719273633249131e00, -1.751058696872038523e00,
+      -2.834060115075742292e00, -2.637372572375544344e00,
+      -2.716586630714696859e00, -1.389827110741064020e00,
+      -2.339411933160714607e00, -2.241657223573843893e00};
+
+  // Expected gradients
+  std::vector<double> expected_dy_dem_x = {
+      6.020806901503564568e00, 4.855212102289546472e00, 5.539480671183889093e00,
+      4.079184573583559903e00, 4.855212102289546472e00, 6.002622647924519939e00,
+      4.071866106011439612e00, 3.510130130958049044e00, 5.539480671183889093e00,
+      4.071866106011439612e00, 5.377211408293748107e00, 5.608493562913938746e00,
+      4.079184573583559903e00, 3.510130130958049044e00, 5.608493562913938746e00,
+      5.361908159053506928e00, 5.856733593151207451e00, 5.562527732431956196e00,
+      3.696970497751218332e00, 3.591262868531118535e00, 5.562527732431956196e00,
+      5.486518130714558517e00, 3.554623757729209022e00, 5.671203227676853587e00,
+      3.696970497751218332e00, 3.554623757729209022e00, 5.409085428112309302e00,
+      5.689207994509548172e00, 3.591262868531118535e00, 5.671203227676853587e00,
+      5.689207994509548172e00, 5.576190638038706737e00, 6.061353798127412595e00,
+      5.246377090750031336e00, 5.243735237316045961e00, 4.603911062145699340e00,
+      5.246377090750031336e00, 5.721012903047327569e00, 4.908561477952471108e00,
+      5.690718304708298980e00, 5.243735237316045961e00, 4.908561477952471108e00,
+      5.654457931163397788e00, 3.881456101028720962e00, 4.603911062145699340e00,
+      5.690718304708298980e00, 3.881456101028720962e00, 5.417053162665033561e00,
+      6.074411415516496326e00, 4.379884232572607949e00, 5.573202499423647538e00,
+      3.509692162941947302e00, 4.379884232572607949e00, 5.338347197725587634e00,
+      3.943361243591621879e00, 4.998151625670068654e00, 5.573202499423647538e00,
+      3.943361243591621879e00, 5.336491405093880758e00, 5.678289531805588375e00,
+      3.509692162941947302e00, 4.998151625670068654e00, 5.678289531805588375e00,
+      5.359752988507883842e00};
+
+  // Test dimensions
+  const int nloc = 4;
+  const int nnei_i = 4;
+  const int nnei_j = 4;
+  const int last_layer_size = 4;
+
+  void SetUp() override {}
+  void TearDown() override {}
+};
+
+TEST_F(TestTabulateSeTTebd, tabulate_fusion_se_t_tebd_cpu) {
+  std::vector<double> xyz_scatter(nloc * nnei_i * nnei_j * last_layer_size, 0);
+  deepmd::tabulate_fusion_se_t_tebd_cpu<double>(
+      &xyz_scatter[0], &table[0], &table_info[0], &em_x[0], &em[0], nloc,
+      nnei_i, nnei_j, last_layer_size);
+  EXPECT_EQ(xyz_scatter.size(), nloc * nnei_i * nnei_j * last_layer_size);
+  EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
+  for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
+  }
+}
+
+TEST_F(TestTabulateSeTTebd, tabulate_fusion_se_t_tebd_grad_cpu) {
+  std::vector<double> dy_dem_x(em_x.size());
+  std::vector<double> dy(nloc * nnei_i * nnei_j * last_layer_size, 1.0);
+
+  deepmd::tabulate_fusion_se_t_tebd_grad_cpu<double>(
+      &dy_dem_x[0], &table[0], &table_info[0], &em_x[0], &em[0], &dy[0], nloc,
+      nnei_i, nnei_j, last_layer_size);
+
+  EXPECT_EQ(dy_dem_x.size(), nloc * nnei_i * nnei_j);
+  EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size());
+
+  for (int jj = 0; jj < dy_dem_x.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5);
+  }
+}
+
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+TEST_F(TestTabulateSeTTebd, tabulate_fusion_se_t_tebd_gpu) {
+  std::vector<double> xyz_scatter(nloc * nnei_i * nnei_j * last_layer_size, 0);
+  double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL,
+         *em_dev = NULL;
+
+  deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter);
+  deepmd::malloc_device_memory_sync(table_dev, table);
+  deepmd::malloc_device_memory_sync(em_x_dev, em_x);
+  deepmd::malloc_device_memory_sync(em_dev, em);
+
+  deepmd::tabulate_fusion_se_t_tebd_gpu<double>(
+      xyz_scatter_dev, table_dev, &table_info[0], em_x_dev, em_dev, nloc,
+      nnei_i, nnei_j, last_layer_size);
+
+  deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter);
+
+  deepmd::delete_device_memory(xyz_scatter_dev);
+  deepmd::delete_device_memory(table_dev);
+  deepmd::delete_device_memory(em_x_dev);
+  deepmd::delete_device_memory(em_dev);
+
+  EXPECT_EQ(xyz_scatter.size(), nloc * nnei_i * nnei_j * last_layer_size);
+  EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
+  for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
+  }
+}
+
+TEST_F(TestTabulateSeTTebd, tabulate_fusion_se_t_tebd_grad_gpu) {
+  std::vector<double> dy_dem_x(em_x.size(), 0.0);
+  std::vector<double> dy(nloc * nnei_i * nnei_j * last_layer_size, 1.0);
+
+  double *dy_dem_x_dev = NULL, *table_dev = NULL, *em_x_dev = NULL,
+         *em_dev = NULL, *dy_dev = NULL;
+
+  deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x);
+  deepmd::malloc_device_memory_sync(table_dev, table);
+  deepmd::malloc_device_memory_sync(em_x_dev, em_x);
+  deepmd::malloc_device_memory_sync(em_dev, em);
+  deepmd::malloc_device_memory_sync(dy_dev, dy);
+
+  deepmd::tabulate_fusion_se_t_tebd_grad_gpu<double>(
+      dy_dem_x_dev, table_dev, &table_info[0], em_x_dev, em_dev, dy_dev, nloc,
+      nnei_i, nnei_j, last_layer_size);
+
+  deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x);
+
+  deepmd::delete_device_memory(dy_dem_x_dev);
+  deepmd::delete_device_memory(table_dev);
+  deepmd::delete_device_memory(em_x_dev);
+  deepmd::delete_device_memory(em_dev);
+  deepmd::delete_device_memory(dy_dev);
+
+  EXPECT_EQ(dy_dem_x.size(), nloc * nnei_i * nnei_j);
+  EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size());
+
+  for (int jj = 0; jj < dy_dem_x.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5);
+  }
+}
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/lmp/plugin/CMakeLists.txt b/source/lmp/plugin/CMakeLists.txt
index a4e7d9e430..8f32af3e3e 100644
--- a/source/lmp/plugin/CMakeLists.txt
+++ b/source/lmp/plugin/CMakeLists.txt
@@ -2,7 +2,7 @@ if(DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
   message(STATUS "enable LAMMPS plugin mode")
   add_library(lammps_interface INTERFACE)
   if(DEFINED LAMMPS_VERSION)
-    cmake_minimum_required(VERSION 3.14)
+    cmake_minimum_required(VERSION 3.25.2)
     include(FetchContent)
     FetchContent_Declare(
       lammps_download
diff --git a/source/lmp/tests/test_deeptensor.py b/source/lmp/tests/test_deeptensor.py
index 41d1c10ed6..20be3033b8 100644
--- a/source/lmp/tests/test_deeptensor.py
+++ b/source/lmp/tests/test_deeptensor.py
@@ -66,6 +66,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_TENSORFLOW", "1") != "1":
+        pytest.skip(
+            "Skip test because TensorFlow support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     # TODO
     # write_lmp_data(box, coord, type_HO, data_type_map_file)
diff --git a/source/lmp/tests/test_dplr.py b/source/lmp/tests/test_dplr.py
index bf8783f233..dd0c03aabe 100644
--- a/source/lmp/tests/test_dplr.py
+++ b/source/lmp/tests/test_dplr.py
@@ -271,6 +271,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_TENSORFLOW", "1") != "1":
+        pytest.skip(
+            "Skip test because TensorFlow support is not enabled.",
+        )
     write_lmp_data_full(
         box, coord, mol_list, type_OH, charge, data_file, bond_list, mass_list
     )
diff --git a/source/lmp/tests/test_lammps.py b/source/lmp/tests/test_lammps.py
index c24f032cf6..05de1e69fa 100644
--- a/source/lmp/tests/test_lammps.py
+++ b/source/lmp/tests/test_lammps.py
@@ -230,6 +230,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_TENSORFLOW", "1") != "1":
+        pytest.skip(
+            "Skip test because TensorFlow support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
     write_lmp_data(
diff --git a/source/lmp/tests/test_lammps_3types.py b/source/lmp/tests/test_lammps_3types.py
index a99a83b758..9156914dbc 100644
--- a/source/lmp/tests/test_lammps_3types.py
+++ b/source/lmp/tests/test_lammps_3types.py
@@ -253,6 +253,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_TENSORFLOW", "1") != "1":
+        pytest.skip(
+            "Skip test because TensorFlow support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
 
diff --git a/source/lmp/tests/test_lammps_dpa_jax.py b/source/lmp/tests/test_lammps_dpa_jax.py
index 65991b9732..51b2d56742 100644
--- a/source/lmp/tests/test_lammps_dpa_jax.py
+++ b/source/lmp/tests/test_lammps_dpa_jax.py
@@ -228,6 +228,10 @@
 
 
 def setup_module():
+    if os.environ.get("ENABLE_JAX", "1") != "1":
+        pytest.skip(
+            "Skip test because JAX support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
     write_lmp_data(
@@ -341,6 +345,10 @@ def test_pair_deepmd_virial(lammps):
         ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps):
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -367,6 +375,10 @@ def test_pair_deepmd_model_devi(lammps):
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial(lammps):
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -405,6 +417,10 @@ def test_pair_deepmd_model_devi_virial(lammps):
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
     lammps.pair_style(
@@ -434,6 +450,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
     lammps.pair_style(
@@ -520,6 +540,10 @@ def test_pair_deepmd_virial_real(lammps_real):
         )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_real(lammps_real):
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -550,6 +574,10 @@ def test_pair_deepmd_model_devi_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial_real(lammps_real):
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -594,6 +622,10 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
@@ -627,6 +659,10 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
diff --git a/source/lmp/tests/test_lammps_dpa_pt.py b/source/lmp/tests/test_lammps_dpa_pt.py
index 2768332c71..6ba71fe52b 100644
--- a/source/lmp/tests/test_lammps_dpa_pt.py
+++ b/source/lmp/tests/test_lammps_dpa_pt.py
@@ -226,6 +226,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_PYTORCH", "1") != "1":
+        pytest.skip(
+            "Skip test because PyTorch support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
     write_lmp_data(
@@ -337,6 +341,10 @@ def test_pair_deepmd_virial(lammps) -> None:
         ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps) -> None:
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -363,6 +371,10 @@ def test_pair_deepmd_model_devi(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial(lammps) -> None:
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -401,6 +413,10 @@ def test_pair_deepmd_model_devi_virial(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -430,6 +446,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -516,6 +536,10 @@ def test_pair_deepmd_virial_real(lammps_real) -> None:
         )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_real(lammps_real) -> None:
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -546,6 +570,10 @@ def test_pair_deepmd_model_devi_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial_real(lammps_real) -> None:
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -590,6 +618,10 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real) -> None:
     relative = 1.0
     lammps_real.pair_style(
@@ -623,6 +655,10 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real) -> None:
     relative = 1.0
     lammps_real.pair_style(
@@ -682,6 +718,10 @@ def test_pair_deepmd_si(lammps_si) -> None:
     ("balance_args",),
     [(["--balance"],), ([],)],
 )
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_mpi(balance_args: list) -> None:
     with tempfile.NamedTemporaryFile() as f:
         sp.check_call(
diff --git a/source/lmp/tests/test_lammps_dpa_pt_nopbc.py b/source/lmp/tests/test_lammps_dpa_pt_nopbc.py
index 1c2e145c84..989a782b5f 100644
--- a/source/lmp/tests/test_lammps_dpa_pt_nopbc.py
+++ b/source/lmp/tests/test_lammps_dpa_pt_nopbc.py
@@ -224,6 +224,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_PYTORCH", "1") != "1":
+        pytest.skip(
+            "Skip test because PyTorch support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
     write_lmp_data(
@@ -335,6 +339,10 @@ def test_pair_deepmd_virial(lammps) -> None:
         ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps) -> None:
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -361,6 +369,10 @@ def test_pair_deepmd_model_devi(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial(lammps) -> None:
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -399,6 +411,10 @@ def test_pair_deepmd_model_devi_virial(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -428,6 +444,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -514,6 +534,10 @@ def test_pair_deepmd_virial_real(lammps_real) -> None:
         )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_real(lammps_real) -> None:
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -544,6 +568,10 @@ def test_pair_deepmd_model_devi_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial_real(lammps_real) -> None:
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -588,6 +616,10 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real) -> None:
     relative = 1.0
     lammps_real.pair_style(
@@ -621,6 +653,10 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real) -> None:
     relative = 1.0
     lammps_real.pair_style(
@@ -680,6 +716,10 @@ def test_pair_deepmd_si(lammps_si) -> None:
     ("balance_args",),
     [(["--balance"],), ([],)],
 )
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_mpi(balance_args: list) -> None:
     with tempfile.NamedTemporaryFile() as f:
         sp.check_call(
diff --git a/source/lmp/tests/test_lammps_dpa_sel_pt.py b/source/lmp/tests/test_lammps_dpa_sel_pt.py
index e758251f18..f65c710409 100644
--- a/source/lmp/tests/test_lammps_dpa_sel_pt.py
+++ b/source/lmp/tests/test_lammps_dpa_sel_pt.py
@@ -229,6 +229,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_PYTORCH", "1") != "1":
+        pytest.skip(
+            "Skip test because PyTorch support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
     write_lmp_data(
@@ -340,6 +344,10 @@ def test_pair_deepmd_virial(lammps) -> None:
         ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps) -> None:
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -366,6 +374,10 @@ def test_pair_deepmd_model_devi(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial(lammps) -> None:
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -404,6 +416,10 @@ def test_pair_deepmd_model_devi_virial(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -433,6 +449,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -519,6 +539,10 @@ def test_pair_deepmd_virial_real(lammps_real) -> None:
         )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_real(lammps_real) -> None:
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -549,6 +573,10 @@ def test_pair_deepmd_model_devi_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial_real(lammps_real) -> None:
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -593,6 +621,10 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real) -> None:
     relative = 1.0
     lammps_real.pair_style(
@@ -626,6 +658,10 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real) -> None:
     relative = 1.0
     lammps_real.pair_style(
diff --git a/source/lmp/tests/test_lammps_faparam.py b/source/lmp/tests/test_lammps_faparam.py
index 4f744119b6..1a614c3d24 100644
--- a/source/lmp/tests/test_lammps_faparam.py
+++ b/source/lmp/tests/test_lammps_faparam.py
@@ -140,6 +140,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_TENSORFLOW", "1") != "1":
+        pytest.skip(
+            "Skip test because TensorFlow support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
 
 
diff --git a/source/lmp/tests/test_lammps_jax.py b/source/lmp/tests/test_lammps_jax.py
index 0c488cd1bc..e3d0e5ce74 100644
--- a/source/lmp/tests/test_lammps_jax.py
+++ b/source/lmp/tests/test_lammps_jax.py
@@ -228,6 +228,10 @@
 
 
 def setup_module():
+    if os.environ.get("ENABLE_JAX", "1") != "1":
+        pytest.skip(
+            "Skip test because JAX support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
     write_lmp_data(
@@ -339,6 +343,10 @@ def test_pair_deepmd_virial(lammps):
         ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps):
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -365,6 +373,10 @@ def test_pair_deepmd_model_devi(lammps):
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial(lammps):
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -403,6 +415,10 @@ def test_pair_deepmd_model_devi_virial(lammps):
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
     lammps.pair_style(
@@ -432,6 +448,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
     lammps.pair_style(
@@ -518,6 +538,10 @@ def test_pair_deepmd_virial_real(lammps_real):
         )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_real(lammps_real):
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -548,6 +572,10 @@ def test_pair_deepmd_model_devi_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial_real(lammps_real):
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -592,6 +620,10 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
@@ -625,6 +657,10 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
@@ -684,6 +720,10 @@ def test_pair_deepmd_si(lammps_si):
     ("balance_args",),
     [(["--balance"],), ([],)],
 )
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_mpi(balance_args: list):
     with tempfile.NamedTemporaryFile() as f:
         sp.check_call(
diff --git a/source/lmp/tests/test_lammps_pd.py b/source/lmp/tests/test_lammps_pd.py
index 92b00aba29..85275c4027 100644
--- a/source/lmp/tests/test_lammps_pd.py
+++ b/source/lmp/tests/test_lammps_pd.py
@@ -229,6 +229,10 @@
 
 
 def setup_module():
+    if os.environ.get("ENABLE_PADDLE", "1") != "1":
+        pytest.skip(
+            "Skip test because Paddle support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
     write_lmp_data(
@@ -340,6 +344,10 @@ def test_pair_deepmd_virial(lammps):
         ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii], RTOL, ATOL)
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps):
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -366,6 +374,10 @@ def test_pair_deepmd_model_devi(lammps):
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial(lammps):
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -406,6 +418,10 @@ def test_pair_deepmd_model_devi_virial(lammps):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
     lammps.pair_style(
@@ -435,6 +451,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
     lammps.pair_style(
@@ -527,6 +547,10 @@ def test_pair_deepmd_virial_real(lammps_real):
         )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_real(lammps_real):
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -557,6 +581,10 @@ def test_pair_deepmd_model_devi_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial_real(lammps_real):
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -621,6 +649,10 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
@@ -654,6 +686,10 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
@@ -714,6 +750,10 @@ def test_pair_deepmd_si(lammps_si):
     ("balance_args",),
     [(["--balance"],), ([],)],
 )
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_mpi(balance_args: list):
     with tempfile.NamedTemporaryFile() as f:
         sp.check_call(
diff --git a/source/lmp/tests/test_lammps_pt.py b/source/lmp/tests/test_lammps_pt.py
index f675b2b671..f6fb8f949b 100644
--- a/source/lmp/tests/test_lammps_pt.py
+++ b/source/lmp/tests/test_lammps_pt.py
@@ -226,6 +226,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_PYTORCH", "1") != "1":
+        pytest.skip(
+            "Skip test because PyTorch support is not enabled.",
+        )
     write_lmp_data(box, coord, type_OH, data_file)
     write_lmp_data(box, coord, type_HO, data_type_map_file)
     write_lmp_data(
@@ -337,6 +341,10 @@ def test_pair_deepmd_virial(lammps) -> None:
         ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps) -> None:
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -363,6 +371,10 @@ def test_pair_deepmd_model_devi(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial(lammps) -> None:
     lammps.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -401,6 +413,10 @@ def test_pair_deepmd_model_devi_virial(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -430,6 +446,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -516,6 +536,10 @@ def test_pair_deepmd_virial_real(lammps_real) -> None:
         )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_real(lammps_real) -> None:
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -546,6 +570,10 @@ def test_pair_deepmd_model_devi_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_virial_real(lammps_real) -> None:
     lammps_real.pair_style(
         f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
@@ -590,6 +618,10 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real) -> None:
     relative = 1.0
     lammps_real.pair_style(
@@ -623,6 +655,10 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real) -> None:
     )
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real) -> None:
     relative = 1.0
     lammps_real.pair_style(
@@ -682,6 +718,10 @@ def test_pair_deepmd_si(lammps_si) -> None:
     ("balance_args",),
     [(["--balance"],), ([],)],
 )
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_mpi(balance_args: list) -> None:
     with tempfile.NamedTemporaryFile() as f:
         sp.check_call(
diff --git a/source/lmp/tests/test_lammps_spin.py b/source/lmp/tests/test_lammps_spin.py
index 9ab7271f5f..4bced5e3f0 100644
--- a/source/lmp/tests/test_lammps_spin.py
+++ b/source/lmp/tests/test_lammps_spin.py
@@ -100,6 +100,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_TENSORFLOW", "1") != "1":
+        pytest.skip(
+            "Skip test because TensorFlow support is not enabled.",
+        )
     write_lmp_data_spin(box, coord, spin, type_NiO, data_file)
 
 
diff --git a/source/lmp/tests/test_lammps_spin_nopbc.py b/source/lmp/tests/test_lammps_spin_nopbc.py
index 3ef9a7a355..0147b3da23 100644
--- a/source/lmp/tests/test_lammps_spin_nopbc.py
+++ b/source/lmp/tests/test_lammps_spin_nopbc.py
@@ -99,6 +99,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_TENSORFLOW", "1") != "1":
+        pytest.skip(
+            "Skip test because TensorFlow support is not enabled.",
+        )
     write_lmp_data_spin(box, coord, spin, type_NiO, data_file)
 
 
diff --git a/source/lmp/tests/test_lammps_spin_nopbc_pt.py b/source/lmp/tests/test_lammps_spin_nopbc_pt.py
index 8a4dc7a7f1..236e94b191 100644
--- a/source/lmp/tests/test_lammps_spin_nopbc_pt.py
+++ b/source/lmp/tests/test_lammps_spin_nopbc_pt.py
@@ -96,6 +96,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_PYTORCH", "1") != "1":
+        pytest.skip(
+            "Skip test because PyTorch support is not enabled.",
+        )
     write_lmp_data_spin(box, coord, spin, type_NiO, data_file)
 
 
@@ -146,6 +150,10 @@ def test_pair_deepmd(lammps) -> None:
     lammps.run(1)
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps) -> None:
     lammps.pair_style(
         f"deepspin {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1"
@@ -169,6 +177,10 @@ def test_pair_deepmd_model_devi(lammps) -> None:
     assert md[9] == pytest.approx(np.mean(expected_md_fm))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -207,6 +219,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     ("balance_args",),
     [(["--balance"],), ([],)],
 )
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_mpi(balance_args: list) -> None:
     with tempfile.NamedTemporaryFile() as f:
         sp.check_call(
diff --git a/source/lmp/tests/test_lammps_spin_pt.py b/source/lmp/tests/test_lammps_spin_pt.py
index 9a0771d047..fd46cb6514 100644
--- a/source/lmp/tests/test_lammps_spin_pt.py
+++ b/source/lmp/tests/test_lammps_spin_pt.py
@@ -96,6 +96,10 @@
 
 
 def setup_module() -> None:
+    if os.environ.get("ENABLE_PYTORCH", "1") != "1":
+        pytest.skip(
+            "Skip test because PyTorch support is not enabled.",
+        )
     write_lmp_data_spin(box, coord, spin, type_NiO, data_file)
 
 
@@ -186,6 +190,10 @@ def test_pair_deepmd_virial(lammps) -> None:
     #     ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi(lammps) -> None:
     lammps.pair_style(
         f"deepspin {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1"
@@ -209,6 +217,10 @@ def test_pair_deepmd_model_devi(lammps) -> None:
     assert md[9] == pytest.approx(np.mean(expected_md_fm))
 
 
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     relative = 1.0
     lammps.pair_style(
@@ -247,6 +259,10 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps) -> None:
     ("balance_args",),
     [(["--balance"],), ([],)],
 )
+@pytest.mark.skipif(
+    os.environ.get("ENABLE_TENSORFLOW", "1") != "1",
+    reason="Skip test because TensorFlow support is not enabled.",
+)
 def test_pair_deepmd_mpi(balance_args: list) -> None:
     with tempfile.NamedTemporaryFile() as f:
         sp.check_call(
diff --git a/source/op/pt/tabulate_multi_device.cc b/source/op/pt/tabulate_multi_device.cc
index feae37af81..530e9ddf4d 100644
--- a/source/op/pt/tabulate_multi_device.cc
+++ b/source/op/pt/tabulate_multi_device.cc
@@ -335,6 +335,150 @@ void TabulateFusionSeTGradGradForward(const torch::Tensor& table_tensor,
   }
 }
 
+template <typename FPTYPE>
+void TabulateFusionSeTTebdForward(const torch::Tensor& table_tensor,
+                                  const torch::Tensor& table_info_tensor,
+                                  const torch::Tensor& em_x_tensor,
+                                  const torch::Tensor& em_tensor,
+                                  int64_t last_layer_size,
+                                  torch::Tensor& descriptor_tensor) {
+  // check input shape
+  if (table_tensor.dim() != 2) {
+    throw std::invalid_argument("Dim of table should be 2");
+  }
+  if (em_x_tensor.dim() != 2) {
+    throw std::invalid_argument("Dim of em_x should be 2");
+  }
+  if (em_tensor.dim() != 3) {
+    throw std::invalid_argument("Dim of em should be 3");
+  }
+  // get the device
+  std::string device;
+  GetTensorDevice(table_tensor, device);
+  // flat the tensors
+  FPTYPE* descriptor = descriptor_tensor.view({-1}).data_ptr<FPTYPE>();
+
+  const FPTYPE* table = table_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* table_info = table_info_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* em_x = em_x_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* em = em_tensor.view({-1}).data_ptr<FPTYPE>();
+
+  const int64_t nloc = em_tensor.size(0);
+  const int64_t nnei_i = em_tensor.size(1);
+  const int64_t nnei_j = em_tensor.size(2);
+  // compute
+  if (device == "GPU") {
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+    deepmd::tabulate_fusion_se_t_tebd_gpu(descriptor, table, table_info, em_x,
+                                          em, nloc, nnei_i, nnei_j,
+                                          last_layer_size);
+#else
+    throw std::runtime_error(
+        "The input tensor is on the GPU, but the GPU support for the "
+        "customized OP library is not enabled.");
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  } else if (device == "CPU") {
+    deepmd::tabulate_fusion_se_t_tebd_cpu(descriptor, table, table_info, em_x,
+                                          em, nloc, nnei_i, nnei_j,
+                                          last_layer_size);
+  }
+}
+
+template <typename FPTYPE>
+void TabulateFusionSeTTebdGradForward(const torch::Tensor& table_tensor,
+                                      const torch::Tensor& table_info_tensor,
+                                      const torch::Tensor& em_x_tensor,
+                                      const torch::Tensor& em_tensor,
+                                      const torch::Tensor& dy_tensor,
+                                      const torch::Tensor& descriptor_tensor,
+                                      torch::Tensor& dy_dem_x_tensor) {
+  // check input shape
+  if (dy_tensor.dim() != 4) {
+    throw std::invalid_argument("Dim of dy_tensor should be 4");
+  }
+  std::string device;
+  GetTensorDevice(table_tensor, device);
+  // flat the tensors
+  FPTYPE* dy_dem_x = dy_dem_x_tensor.view({-1}).data_ptr<FPTYPE>();
+
+  const FPTYPE* table = table_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* table_info = table_info_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* em_x = em_x_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* em = em_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* dy = dy_tensor.view({-1}).data_ptr<FPTYPE>();
+
+  const int64_t nloc = em_tensor.size(0);
+  const int64_t nnei_i = em_tensor.size(1);
+  const int64_t nnei_j = em_tensor.size(2);
+  const int64_t last_layer_size = descriptor_tensor.size(3);
+
+  // compute
+  if (device == "GPU") {
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+    deepmd::tabulate_fusion_se_t_tebd_grad_gpu(dy_dem_x, table, table_info,
+                                               em_x, em, dy, nloc, nnei_i,
+                                               nnei_j, last_layer_size);
+#else
+    throw std::runtime_error(
+        "The input tensor is on the GPU, but the GPU support for the "
+        "customized OP library is not enabled.");
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  } else if (device == "CPU") {
+    deepmd::tabulate_fusion_se_t_tebd_grad_cpu(dy_dem_x, table, table_info,
+                                               em_x, em, dy, nloc, nnei_i,
+                                               nnei_j, last_layer_size);
+  }
+}
+
+template <typename FPTYPE>
+void TabulateFusionSeTTebdGradGradForward(
+    const torch::Tensor& table_tensor,
+    const torch::Tensor& table_info_tensor,
+    const torch::Tensor& em_x_tensor,
+    const torch::Tensor& em_tensor,
+    const torch::Tensor& dz_dy_dem_x_tensor,
+    const torch::Tensor& descriptor_tensor,
+    torch::Tensor& dz_dy_tensor) {
+  // Check input shape
+  if (dz_dy_dem_x_tensor.dim() != 3) {
+    throw std::invalid_argument("Dim of dz_dy_dem_x should be 3");
+  }
+  // get the device
+  std::string device;
+  GetTensorDevice(table_tensor, device);
+  // flat the tensors
+  FPTYPE* dz_dy = dz_dy_tensor.view({-1}).data_ptr<FPTYPE>();
+
+  const FPTYPE* table = table_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* table_info = table_info_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* em_x = em_x_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* em = em_tensor.view({-1}).data_ptr<FPTYPE>();
+  const FPTYPE* dz_dy_dem_x = dz_dy_dem_x_tensor.view({-1}).data_ptr<FPTYPE>();
+  const int64_t nloc = em_tensor.size(0);
+  const int64_t nnei_i = em_tensor.size(1);
+  const int64_t nnei_j = em_tensor.size(2);
+  const int64_t last_layer_size = descriptor_tensor.size(3);
+  // compute
+  if (device == "GPU") {
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+    deepmd::tabulate_fusion_se_t_tebd_grad_grad_gpu(
+        dz_dy, table, table_info, em_x, em, dz_dy_dem_x, nloc, nnei_i, nnei_j,
+        last_layer_size);
+#else
+    throw std::runtime_error(
+        "The input tensor is on the GPU, but the GPU support for the "
+        "customized OP library is not enabled.");
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+    TORCH_CHECK(last_layer_size <= 1024,
+                "In the process of model compression, the size of the "
+                "last layer of embedding net must be less than 1024!");
+  } else if (device == "CPU") {
+    deepmd::tabulate_fusion_se_t_tebd_grad_grad_cpu(
+        dz_dy, table, table_info, em_x, em, dz_dy_dem_x, nloc, nnei_i, nnei_j,
+        last_layer_size);
+  }
+}
+
 template <typename FPTYPE>
 void TabulateFusionSeRForward(const torch::Tensor& table_tensor,
                               const torch::Tensor& table_info_tensor,
@@ -939,6 +1083,90 @@ class TabulateFusionSeROp
   }
 };
 
+class TabulateFusionSeTTebdOp
+    : public torch::autograd::Function<TabulateFusionSeTTebdOp> {
+ public:
+  static std::vector<torch::Tensor> forward(
+      torch::autograd::AutogradContext* ctx,
+      const torch::Tensor& table_tensor,
+      const torch::Tensor& table_info_tensor,
+      const torch::Tensor& em_x_tensor,
+      const torch::Tensor& em_tensor,
+      int64_t last_layer_size) {
+    bool type_flag = (table_tensor.dtype() == torch::kDouble) ? true : false;
+    if (type_flag) {
+      return forward_t<double>(ctx, table_tensor, table_info_tensor,
+                               em_x_tensor, em_tensor, last_layer_size);
+    } else {
+      return forward_t<float>(ctx, table_tensor, table_info_tensor, em_x_tensor,
+                              em_tensor, last_layer_size);
+    }
+  }
+
+  template <typename FPTYPE>
+  static torch::autograd::variable_list forward_t(
+      torch::autograd::AutogradContext* ctx,
+      const torch::Tensor& table_tensor,
+      const torch::Tensor& table_info_tensor,
+      const torch::Tensor& em_x_tensor,
+      const torch::Tensor& em_tensor,
+      int64_t last_layer_size) {
+    // allocate output tensors
+    auto options = torch::TensorOptions()
+                       .dtype(table_tensor.dtype())
+                       .device(table_tensor.device());
+    torch::Tensor descriptor_tensor =
+        torch::empty({em_tensor.size(0), em_tensor.size(1), em_tensor.size(2),
+                      last_layer_size},
+                     options);
+    // compute
+    TabulateFusionSeTTebdForward<FPTYPE>(table_tensor, table_info_tensor,
+                                         em_x_tensor, em_tensor,
+                                         last_layer_size, descriptor_tensor);
+    // save data
+    ctx->save_for_backward({table_tensor, table_info_tensor, em_x_tensor,
+                            em_tensor, descriptor_tensor});
+    return {descriptor_tensor};
+  }
+
+  static torch::autograd::variable_list backward(
+      torch::autograd::AutogradContext* ctx,
+      torch::autograd::variable_list grad_output) {
+    torch::autograd::variable_list saved_variables = ctx->get_saved_variables();
+    torch::Tensor table_tensor = saved_variables[0];
+    bool type_flag = (table_tensor.dtype() == torch::kDouble) ? true : false;
+    if (type_flag) {
+      return backward_t<double>(ctx, grad_output);
+    } else {
+      return backward_t<float>(ctx, grad_output);
+    }
+  }
+
+  template <typename FPTYPE>
+  static torch::autograd::variable_list backward_t(
+      torch::autograd::AutogradContext* ctx,
+      torch::autograd::variable_list grad_output) {
+    // load data
+    torch::autograd::variable_list saved_variables = ctx->get_saved_variables();
+    torch::Tensor table_tensor = saved_variables[0];
+    torch::Tensor table_info_tensor = saved_variables[1];
+    torch::Tensor em_x_tensor = saved_variables[2];
+    torch::Tensor em_tensor = saved_variables[3];
+    torch::Tensor descriptor_tensor = saved_variables[4];
+
+    torch::Tensor dy_tensor = grad_output[0].contiguous();
+    // allocate output tensors
+    torch::Tensor dy_dem_x_tensor = torch::zeros_like(em_x_tensor);
+    // compute
+    TabulateFusionSeTTebdGradForward<FPTYPE>(
+        table_tensor, table_info_tensor, em_x_tensor, em_tensor, dy_tensor,
+        descriptor_tensor, dy_dem_x_tensor);
+
+    return {at::Tensor(), at::Tensor(), dy_dem_x_tensor, at::Tensor(),
+            at::Tensor()};
+  }
+};
+
 std::vector<torch::Tensor> tabulate_fusion_se_a(
     const torch::Tensor& table_tensor,
     const torch::Tensor& table_info_tensor,  // only cpu
@@ -972,6 +1200,16 @@ std::vector<torch::Tensor> tabulate_fusion_se_t(
                                     em_x_tensor, em_tensor, last_layer_size);
 }
 
+std::vector<torch::Tensor> tabulate_fusion_se_t_tebd(
+    const torch::Tensor& table_tensor,
+    const torch::Tensor& table_info_tensor,  // only cpu
+    const torch::Tensor& em_x_tensor,
+    const torch::Tensor& em_tensor,
+    int64_t last_layer_size) {
+  return TabulateFusionSeTTebdOp::apply(
+      table_tensor, table_info_tensor, em_x_tensor, em_tensor, last_layer_size);
+}
+
 std::vector<torch::Tensor> tabulate_fusion_se_r(
     const torch::Tensor& table_tensor,
     const torch::Tensor& table_info_tensor,  // only cpu
@@ -990,6 +1228,9 @@ TORCH_LIBRARY_FRAGMENT(deepmd, m) {
 TORCH_LIBRARY_FRAGMENT(deepmd, m) {
   m.def("tabulate_fusion_se_t", tabulate_fusion_se_t);
 }
+TORCH_LIBRARY_FRAGMENT(deepmd, m) {
+  m.def("tabulate_fusion_se_t_tebd", tabulate_fusion_se_t_tebd);
+}
 TORCH_LIBRARY_FRAGMENT(deepmd, m) {
   m.def("tabulate_fusion_se_r", tabulate_fusion_se_r);
 }
diff --git a/source/op/tf/unaggregated_grad.cc b/source/op/tf/unaggregated_grad.cc
index cf645f6c21..329e25b2d2 100644
--- a/source/op/tf/unaggregated_grad.cc
+++ b/source/op/tf/unaggregated_grad.cc
@@ -75,6 +75,10 @@ FPTYPE grad(const FPTYPE xbar,
     case 6: {
       return y * (1 - y);
     }
+    case 7: {
+      const FPTYPE sig = 1.0 / (1.0 + exp(-xbar));
+      return sig + xbar * sig * (1 - sig);
+    }
     default:
       return -1;
   }
@@ -105,6 +109,11 @@ FPTYPE grad_grad(const FPTYPE xbar, const FPTYPE y, const int functype) {
     case 6: {
       return y * (1 - y) * (1 - 2 * y);
     }
+    case 7: {
+      const FPTYPE sig = 1.0 / (1.0 + exp(-xbar));
+      const FPTYPE d_sig = sig * (1 - sig);
+      return 2 * d_sig + xbar * d_sig * (1 - 2 * sig);
+    }
     default:
       return -1;
   }
diff --git a/source/tests/__init__.py b/source/tests/__init__.py
index 6ceb116d85..16149c2cd0 100644
--- a/source/tests/__init__.py
+++ b/source/tests/__init__.py
@@ -1 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+
+import os
+
+if os.environ.get("DP_CI_IMPORT_PADDLE_BEFORE_TF", "0") == "1":
+    import paddle  # noqa: F401
+    import tensorflow  # noqa: F401
diff --git a/source/tests/common/dpmodel/test_fitting_stat.py b/source/tests/common/dpmodel/test_fitting_stat.py
new file mode 100644
index 0000000000..101d2a9ad7
--- /dev/null
+++ b/source/tests/common/dpmodel/test_fitting_stat.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting import (
+    EnergyFittingNet,
+)
+
+
+def _make_fake_data_pt(sys_natoms, sys_nframes, avgs, stds):
+    merged_output_stat = []
+    nsys = len(sys_natoms)
+    ndof = len(avgs)
+    for ii in range(nsys):
+        sys_dict = {}
+        tmp_data_f = []
+        tmp_data_a = []
+        for jj in range(ndof):
+            rng = np.random.default_rng(2025 * ii + 220 * jj)
+            tmp_data_f.append(
+                rng.normal(loc=avgs[jj], scale=stds[jj], size=(sys_nframes[ii], 1))
+            )
+            rng = np.random.default_rng(220 * ii + 1636 * jj)
+            tmp_data_a.append(
+                rng.normal(
+                    loc=avgs[jj], scale=stds[jj], size=(sys_nframes[ii], sys_natoms[ii])
+                )
+            )
+        tmp_data_f = np.transpose(tmp_data_f, (1, 2, 0))
+        tmp_data_a = np.transpose(tmp_data_a, (1, 2, 0))
+        sys_dict["fparam"] = tmp_data_f
+        sys_dict["aparam"] = tmp_data_a
+        merged_output_stat.append(sys_dict)
+    return merged_output_stat
+
+
+def _brute_fparam_pt(data, ndim):
+    adata = [ii["fparam"] for ii in data]
+    all_data = []
+    for ii in adata:
+        tmp = np.reshape(ii, [-1, ndim])
+        if len(all_data) == 0:
+            all_data = np.array(tmp)
+        else:
+            all_data = np.concatenate((all_data, tmp), axis=0)
+    avg = np.average(all_data, axis=0)
+    std = np.std(all_data, axis=0)
+    return avg, std
+
+
+def _brute_aparam_pt(data, ndim):
+    adata = [ii["aparam"] for ii in data]
+    all_data = []
+    for ii in adata:
+        tmp = np.reshape(ii, [-1, ndim])
+        if len(all_data) == 0:
+            all_data = np.array(tmp)
+        else:
+            all_data = np.concatenate((all_data, tmp), axis=0)
+    avg = np.average(all_data, axis=0)
+    std = np.std(all_data, axis=0)
+    return avg, std
+
+
+class TestEnerFittingStat(unittest.TestCase):
+    def test(self) -> None:
+        descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
+        fitting = EnergyFittingNet(
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
+            neuron=[240, 240, 240],
+            resnet_dt=True,
+            numb_fparam=3,
+            numb_aparam=3,
+        )
+        avgs = [0, 10, 100]
+        stds = [2, 0.4, 0.00001]
+        sys_natoms = [10, 100]
+        sys_nframes = [5, 2]
+        all_data = _make_fake_data_pt(sys_natoms, sys_nframes, avgs, stds)
+        frefa, frefs = _brute_fparam_pt(all_data, len(avgs))
+        arefa, arefs = _brute_aparam_pt(all_data, len(avgs))
+        fitting.compute_input_stats(all_data, protection=1e-2)
+        frefs_inv = 1.0 / frefs
+        arefs_inv = 1.0 / arefs
+        frefs_inv[frefs_inv > 100] = 100
+        arefs_inv[arefs_inv > 100] = 100
+        np.testing.assert_almost_equal(frefa, fitting.fparam_avg)
+        np.testing.assert_almost_equal(frefs_inv, fitting.fparam_inv_std)
+        np.testing.assert_almost_equal(arefa, fitting.aparam_avg)
+        np.testing.assert_almost_equal(arefs_inv, fitting.aparam_inv_std)
diff --git a/source/tests/common/test_deepmd_data.py b/source/tests/common/test_deepmd_data.py
new file mode 100644
index 0000000000..c7a26491cd
--- /dev/null
+++ b/source/tests/common/test_deepmd_data.py
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import tempfile
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+
+from deepmd.utils.data import (
+    DeepmdData,
+)
+
+
+class TestDeepmdDataTypeMap(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tmpdir = tempfile.TemporaryDirectory()
+        self.root = Path(self.tmpdir.name)
+        self.set_dir = self.root / "set.000"
+        self.set_dir.mkdir()
+
+        # minimal required dataset
+        atom_types = np.array([0, 1, 0, 1], dtype=np.int32)
+        np.savetxt(self.root / "type.raw", atom_types, fmt="%d")
+        np.savetxt(
+            self.root / "type_map.raw",
+            np.array(["O", "H", "Si"], dtype=object),
+            fmt="%s",
+        )
+
+        coord = np.zeros((1, atom_types.size * 3), dtype=np.float32)
+        box = np.eye(3, dtype=np.float32).reshape(1, 9)
+        np.save(self.set_dir / "coord.npy", coord)
+        np.save(self.set_dir / "box.npy", box)
+
+    def tearDown(self) -> None:
+        self.tmpdir.cleanup()
+
+    def test_remap_with_unused_types(self) -> None:
+        data = DeepmdData(str(self.root), type_map=["H", "O", "Si"])
+
+        expected_atom_types = np.array([1, 0, 1, 0], dtype=np.int32)
+        np.testing.assert_array_equal(data.atom_type, expected_atom_types)
+        self.assertEqual(data.type_map, ["H", "O", "Si"])
+
+        loaded = data._load_set(self.set_dir)
+        expected_sorted = expected_atom_types[data.idx_map]
+        np.testing.assert_array_equal(loaded["type"], np.tile(expected_sorted, (1, 1)))
diff --git a/source/tests/pd/conftest.py b/source/tests/pd/conftest.py
index 530cb18907..6320d59e7c 100644
--- a/source/tests/pd/conftest.py
+++ b/source/tests/pd/conftest.py
@@ -6,4 +6,5 @@
 @pytest.fixture(scope="package", autouse=True)
 def clear_cuda_memory(request):
     yield
-    paddle.device.cuda.empty_cache()
+    if paddle.device.get_device() != "cpu":
+        paddle.device.empty_cache()
diff --git a/source/tests/pd/test_multitask.py b/source/tests/pd/test_multitask.py
index 0b85816f7b..f345630429 100644
--- a/source/tests/pd/test_multitask.py
+++ b/source/tests/pd/test_multitask.py
@@ -11,6 +11,7 @@
 )
 
 import numpy as np
+import paddle
 
 from deepmd.pd.entrypoints.main import (
     get_trainer,
@@ -232,8 +233,15 @@ def setUp(self) -> None:
         self.config["model"], self.shared_links = preprocess_shared_params(
             self.config["model"]
         )
+        if not paddle.device.is_compiled_with_cuda():
+            self.FLAGS_use_stride_kernel = paddle.get_flags("FLAGS_use_stride_kernel")[
+                "FLAGS_use_stride_kernel"
+            ]
+            paddle.set_flags({"FLAGS_use_stride_kernel": False})
 
     def tearDown(self) -> None:
+        if not paddle.device.is_compiled_with_cuda():
+            paddle.set_flags({"FLAGS_use_stride_kernel": self.FLAGS_use_stride_kernel})
         MultiTaskTrainTest.tearDown(self)
 
 
@@ -271,9 +279,17 @@ def setUp(self) -> None:
         self.config["model"], self.shared_links = preprocess_shared_params(
             self.config["model"]
         )
+        self.config["learning_rate"]["start_lr"] = 1e-5
         self.share_fitting = True
+        if not paddle.device.is_compiled_with_cuda():
+            self.FLAGS_use_stride_kernel = paddle.get_flags("FLAGS_use_stride_kernel")[
+                "FLAGS_use_stride_kernel"
+            ]
+            paddle.set_flags({"FLAGS_use_stride_kernel": False})
 
     def tearDown(self) -> None:
+        if not paddle.device.is_compiled_with_cuda():
+            paddle.set_flags({"FLAGS_use_stride_kernel": self.FLAGS_use_stride_kernel})
         MultiTaskTrainTest.tearDown(self)
 
 
diff --git a/source/tests/pd/test_training.py b/source/tests/pd/test_training.py
index 0dc36fa314..f3d7860881 100644
--- a/source/tests/pd/test_training.py
+++ b/source/tests/pd/test_training.py
@@ -89,7 +89,11 @@ def test_dp_train(self) -> None:
                     state_dict_trained[state_key].numpy(),
                     state_dict_finetuned_empty[state_key].numpy(),
                 )
-                if "fitting_net" not in state_key:
+                if (
+                    ("fitting_net" not in state_key)
+                    or ("fparam" in state_key)
+                    or ("aparam" in state_key)
+                ):
                     np.testing.assert_allclose(
                         state_dict_trained[state_key].numpy(),
                         state_dict_finetuned_random[state_key].numpy(),
@@ -190,6 +194,7 @@ def setUp(self) -> None:
         self.config["training"]["save_freq"] = 1
         self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000"
         shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy")
+        self.config["model"]["data_stat_nbatch"] = 100
 
     def tearDown(self) -> None:
         (self.set_path / "fparam.npy").unlink(missing_ok=True)
diff --git a/source/tests/pt/model/test_descriptor_dpa1.py b/source/tests/pt/model/test_descriptor_dpa1.py
index abf5d1af01..27b84879dc 100644
--- a/source/tests/pt/model/test_descriptor_dpa1.py
+++ b/source/tests/pt/model/test_descriptor_dpa1.py
@@ -249,6 +249,7 @@ def test_descriptor_block(self) -> None:
         # this is an old state dict, modify manually
         state_dict["compress_info.0"] = des.compress_info[0]
         state_dict["compress_data.0"] = des.compress_data[0]
+        state_dict["type_embd_data"] = des.type_embd_data
         des.load_state_dict(state_dict)
         coord = self.coord
         atype = self.atype
@@ -377,5 +378,6 @@ def translate_se_atten_and_type_embd_dicts_to_dpa1(
         target_dict[tk] = type_embd_dict[kk]
     record[all_keys.index("se_atten.compress_data.0")] = True
     record[all_keys.index("se_atten.compress_info.0")] = True
+    record[all_keys.index("se_atten.type_embd_data")] = True
     assert all(record)
     return target_dict
diff --git a/source/tests/pt/model/test_descriptor_dpa2.py b/source/tests/pt/model/test_descriptor_dpa2.py
index 6a859a497a..3fa6b86636 100644
--- a/source/tests/pt/model/test_descriptor_dpa2.py
+++ b/source/tests/pt/model/test_descriptor_dpa2.py
@@ -196,5 +196,6 @@ def translate_type_embd_dicts_to_dpa2(
         target_dict[tk] = type_embd_dict[kk]
     record[all_keys.index("repinit.compress_data.0")] = True
     record[all_keys.index("repinit.compress_info.0")] = True
+    record[all_keys.index("repinit.type_embd_data")] = True
     assert all(record)
     return target_dict
diff --git a/source/tests/pt/model/water/data/data_1/set.000/box.npy b/source/tests/pt/model/water/data/data_1/set.000/box.npy
new file mode 100644
index 0000000000..6ad2de625b
Binary files /dev/null and b/source/tests/pt/model/water/data/data_1/set.000/box.npy differ
diff --git a/source/tests/pt/model/water/data/data_1/set.000/coord.npy b/source/tests/pt/model/water/data/data_1/set.000/coord.npy
new file mode 100644
index 0000000000..8bd448b125
Binary files /dev/null and b/source/tests/pt/model/water/data/data_1/set.000/coord.npy differ
diff --git a/source/tests/pt/model/water/data/data_1/set.000/energy.npy b/source/tests/pt/model/water/data/data_1/set.000/energy.npy
new file mode 100644
index 0000000000..d03db103f5
Binary files /dev/null and b/source/tests/pt/model/water/data/data_1/set.000/energy.npy differ
diff --git a/source/tests/pt/model/water/data/data_1/set.000/force.npy b/source/tests/pt/model/water/data/data_1/set.000/force.npy
new file mode 100644
index 0000000000..10b2ab83a2
Binary files /dev/null and b/source/tests/pt/model/water/data/data_1/set.000/force.npy differ
diff --git a/source/tests/pt/model/water/data/data_1/type.raw b/source/tests/pt/model/water/data/data_1/type.raw
new file mode 100644
index 0000000000..97e8fdfcf8
--- /dev/null
+++ b/source/tests/pt/model/water/data/data_1/type.raw
@@ -0,0 +1,192 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/pt/model/water/data/data_1/type_map.raw b/source/tests/pt/model/water/data/data_1/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pt/model/water/data/data_1/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pt/test_fitting_stat.py b/source/tests/pt/test_fitting_stat.py
index bc02b539a0..7807523221 100644
--- a/source/tests/pt/test_fitting_stat.py
+++ b/source/tests/pt/test_fitting_stat.py
@@ -1,18 +1,52 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import tempfile
 import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    NoReturn,
+)
 
+import h5py
 import numpy as np
+import torch
 
+from deepmd.pt.entrypoints.main import (
+    get_trainer,
+)
 from deepmd.pt.model.descriptor import (
     DescrptSeA,
 )
 from deepmd.pt.model.task import (
     EnergyFittingNet,
 )
+from deepmd.pt.utils.multi_task import (
+    preprocess_shared_params,
+)
 from deepmd.pt.utils.utils import (
     to_numpy_array,
     to_torch_tensor,
 )
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .model.test_permutation import (
+    model_se_e2_a,
+)
 
 
 def _make_fake_data_pt(sys_natoms, sys_nframes, avgs, stds):
@@ -71,16 +105,18 @@ def _brute_aparam_pt(data, ndim):
 
 
 class TestEnerFittingStat(unittest.TestCase):
+    def tearDown(self) -> None:
+        self.tempdir.cleanup()
+
+    def setUp(self) -> None:
+        self.tempdir = tempfile.TemporaryDirectory()
+        h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve())
+        with h5py.File(h5file, "w") as f:
+            pass
+        self.stat_file_path = DPPath(h5file, "a")
+
     def test(self) -> None:
         descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
-        fitting = EnergyFittingNet(
-            descrpt.get_ntypes(),
-            descrpt.get_dim_out(),
-            neuron=[240, 240, 240],
-            resnet_dt=True,
-            numb_fparam=3,
-            numb_aparam=3,
-        )
         avgs = [0, 10, 100]
         stds = [2, 0.4, 0.00001]
         sys_natoms = [10, 100]
@@ -88,11 +124,23 @@ def test(self) -> None:
         all_data = _make_fake_data_pt(sys_natoms, sys_nframes, avgs, stds)
         frefa, frefs = _brute_fparam_pt(all_data, len(avgs))
         arefa, arefs = _brute_aparam_pt(all_data, len(avgs))
-        fitting.compute_input_stats(all_data, protection=1e-2)
         frefs_inv = 1.0 / frefs
         arefs_inv = 1.0 / arefs
         frefs_inv[frefs_inv > 100] = 100
         arefs_inv[arefs_inv > 100] = 100
+
+        # 1. test fitting stat is applied
+        fitting = EnergyFittingNet(
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
+            neuron=[240, 240, 240],
+            resnet_dt=True,
+            numb_fparam=3,
+            numb_aparam=3,
+        )
+        fitting.compute_input_stats(
+            all_data, protection=1e-2, stat_file_path=self.stat_file_path
+        )
         np.testing.assert_almost_equal(frefa, to_numpy_array(fitting.fparam_avg))
         np.testing.assert_almost_equal(
             frefs_inv, to_numpy_array(fitting.fparam_inv_std)
@@ -101,3 +149,347 @@ def test(self) -> None:
         np.testing.assert_almost_equal(
             arefs_inv, to_numpy_array(fitting.aparam_inv_std)
         )
+        del fitting
+
+        # 2. test fitting stat writing to file is correct
+        concat_fparam = np.concatenate(
+            [
+                to_numpy_array(all_data[ii]["fparam"].reshape(-1, 3))
+                for ii in range(len(sys_nframes))
+            ]
+        )
+        concat_aparam = np.concatenate(
+            [
+                to_numpy_array(all_data[ii]["aparam"].reshape(-1, 3))
+                for ii in range(len(sys_nframes))
+            ]
+        )
+        fparam_stat = (self.stat_file_path / "fparam").load_numpy()
+        aparam_stat = (self.stat_file_path / "aparam").load_numpy()
+        np.testing.assert_almost_equal(
+            fparam_stat[:, 0], np.array([concat_fparam.shape[0]] * 3)
+        )
+        np.testing.assert_almost_equal(fparam_stat[:, 1], np.sum(concat_fparam, axis=0))
+        np.testing.assert_almost_equal(
+            fparam_stat[:, 2], np.sum(concat_fparam**2, axis=0)
+        )
+        np.testing.assert_almost_equal(
+            aparam_stat[:, 0], np.array([concat_aparam.shape[0]] * 3)
+        )
+        np.testing.assert_almost_equal(aparam_stat[:, 1], np.sum(concat_aparam, axis=0))
+        np.testing.assert_almost_equal(
+            aparam_stat[:, 2], np.sum(concat_aparam**2, axis=0)
+        )
+
+        # 3. test fitting stat load from file
+        def raise_error() -> NoReturn:
+            raise RuntimeError
+
+        fitting = EnergyFittingNet(
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
+            neuron=[240, 240, 240],
+            resnet_dt=True,
+            numb_fparam=3,
+            numb_aparam=3,
+        )
+        fitting.compute_input_stats(
+            raise_error, protection=1e-2, stat_file_path=self.stat_file_path
+        )
+        np.testing.assert_almost_equal(frefa, to_numpy_array(fitting.fparam_avg))
+        np.testing.assert_almost_equal(
+            frefs_inv, to_numpy_array(fitting.fparam_inv_std)
+        )
+        np.testing.assert_almost_equal(arefa, to_numpy_array(fitting.aparam_avg))
+        np.testing.assert_almost_equal(
+            arefs_inv, to_numpy_array(fitting.aparam_inv_std)
+        )
+
+
+def get_weighted_fitting_stat(model_prob: list, *stat_arrays, protection: float):
+    n_arrays = len(stat_arrays)
+    assert len(model_prob) == n_arrays
+
+    nframes = [stat.shape[0] for stat in stat_arrays]
+    sums = [stat.sum(axis=0) for stat in stat_arrays]
+    squared_sums = [(stat**2).sum(axis=0) for stat in stat_arrays]
+
+    weighted_sum = sum(model_prob[i] * sums[i] for i in range(n_arrays))
+    total_weighted_frames = sum(model_prob[i] * nframes[i] for i in range(n_arrays))
+    weighted_avg = weighted_sum / total_weighted_frames
+
+    weighted_square_sum = sum(model_prob[i] * squared_sums[i] for i in range(n_arrays))
+    weighted_square_avg = weighted_square_sum / total_weighted_frames
+    weighted_std = np.sqrt(weighted_square_avg - weighted_avg**2)
+    weighted_std = np.where(weighted_std < protection, protection, weighted_std)
+
+    return weighted_avg, weighted_std
+
+
+class TestMultiTaskFittingStat(unittest.TestCase):
+    def setUp(self) -> None:
+        multitask_sharefit_template_json = str(
+            Path(__file__).parent / "water/multitask_sharefit.json"
+        )
+        with open(multitask_sharefit_template_json) as f:
+            multitask_se_e2_a = json.load(f)
+        multitask_se_e2_a["model"]["shared_dict"]["my_descriptor"] = model_se_e2_a[
+            "descriptor"
+        ]
+        self.data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.data_file_without_fparam = [
+            str(Path(__file__).parent / "water/data/data_1")
+        ]
+        self.data_file_single = [str(Path(__file__).parent / "water/data/single")]
+        self.stat_files = "se_e2_a_share_fit"
+        os.makedirs(self.stat_files, exist_ok=True)
+
+        self.config = multitask_se_e2_a
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["model"]["shared_dict"]["my_fitting"]["numb_fparam"] = 2
+        self.default_fparam = [1.0, 0.0]
+        self.config["model"]["shared_dict"]["my_fitting"]["default_fparam"] = (
+            self.default_fparam
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+        self.origin_config = deepcopy(self.config)
+
+    def test_sharefitting_with_fparam(self):
+        # test multitask training with fparam
+        self.config = deepcopy(self.origin_config)
+        model_prob = [0.3, 0.7]
+        self.config["training"]["model_prob"]["model_1"] = model_prob[0]
+        self.config["training"]["model_prob"]["model_2"] = model_prob[1]
+
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            self.data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = self.data_file
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            self.data_file_single
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = self.data_file_single
+        self.config["model"]["model_dict"]["model_1"]["data_stat_nbatch"] = 100
+
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+        self.config = update_deepmd_input(self.config, warning=True)
+        self.config = normalize(self.config, multi_task=True)
+        trainer = get_trainer(deepcopy(self.config), shared_links=self.shared_links)
+        trainer.run()
+
+        # check fparam shared
+        multi_state_dict = trainer.wrapper.model.state_dict()
+        torch.testing.assert_close(
+            multi_state_dict["model_1.atomic_model.fitting_net.fparam_avg"],
+            multi_state_dict["model_2.atomic_model.fitting_net.fparam_avg"],
+        )
+        torch.testing.assert_close(
+            multi_state_dict["model_1.atomic_model.fitting_net.fparam_inv_std"],
+            multi_state_dict["model_2.atomic_model.fitting_net.fparam_inv_std"],
+        )
+
+        # check fitting stat in stat_file is correct
+        fparam_stat_model1 = np.load(f"{self.stat_files}/model_1/O H B/fparam")
+        fparam_stat_model2 = np.load(f"{self.stat_files}/model_2/O H B/fparam")
+        fparam_data1 = np.load(f"{self.data_file[0]}/set.000/fparam.npy")
+        fparam_data2 = np.load(f"{self.data_file_single[0]}/set.000/fparam.npy")
+        np.testing.assert_almost_equal(
+            fparam_stat_model1[:, 0], [fparam_data1.shape[0]] * 2
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model1[:, 1], fparam_data1.sum(axis=0)
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model1[:, 2], (fparam_data1**2).sum(axis=0)
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model2[:, 0], [fparam_data2.shape[0]] * 2
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model2[:, 1], fparam_data2.sum(axis=0)
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model2[:, 2], (fparam_data2**2).sum(axis=0)
+        )
+
+        # check shared fitting stat is computed correctly
+        weighted_avg, weighted_std = get_weighted_fitting_stat(
+            model_prob, fparam_data1, fparam_data2, protection=1e-2
+        )
+        np.testing.assert_almost_equal(
+            weighted_avg,
+            to_numpy_array(
+                multi_state_dict["model_1.atomic_model.fitting_net.fparam_avg"]
+            ),
+        )
+        np.testing.assert_almost_equal(
+            1 / weighted_std,
+            to_numpy_array(
+                multi_state_dict["model_1.atomic_model.fitting_net.fparam_inv_std"]
+            ),
+        )
+
+    def test_sharefitting_using_default_fparam(self):
+        # test multitask training with fparam
+        self.config = deepcopy(self.origin_config)
+        # add model3
+        self.config["model"]["model_dict"]["model_3"] = deepcopy(
+            self.config["model"]["model_dict"]["model_2"]
+        )
+        self.config["loss_dict"]["model_3"] = deepcopy(
+            self.config["loss_dict"]["model_2"]
+        )
+        self.config["training"]["model_prob"]["model_3"] = deepcopy(
+            self.config["training"]["model_prob"]["model_2"]
+        )
+        self.config["training"]["data_dict"]["model_3"] = deepcopy(
+            self.config["training"]["data_dict"]["model_2"]
+        )
+        self.config["training"]["data_dict"]["model_3"]["stat_file"] = self.config[
+            "training"
+        ]["data_dict"]["model_3"]["stat_file"].replace("model_2", "model_3")
+        self.config["model"]["shared_dict"]["my_fitting"]["dim_case_embd"] = 3
+
+        model_prob = [0.1, 0.3, 0.6]
+        self.config["training"]["model_prob"]["model_1"] = model_prob[0]
+        self.config["training"]["model_prob"]["model_2"] = model_prob[1]
+        self.config["training"]["model_prob"]["model_3"] = model_prob[2]
+
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            self.data_file_without_fparam
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = self.data_file_without_fparam
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            self.data_file_single
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = self.data_file_single
+        self.config["training"]["data_dict"]["model_3"]["stat_file"] = (
+            f"{self.stat_files}/model_3"
+        )
+        self.config["training"]["data_dict"]["model_3"]["training_data"]["systems"] = (
+            self.data_file
+        )
+        self.config["training"]["data_dict"]["model_3"]["validation_data"][
+            "systems"
+        ] = self.data_file
+        data_stat_protect = 5e-3
+        self.config["model"]["model_dict"]["model_1"]["data_stat_nbatch"] = 3
+        self.config["model"]["model_dict"]["model_3"]["data_stat_nbatch"] = 100
+        self.config["model"]["model_dict"]["model_1"]["data_stat_protect"] = (
+            data_stat_protect
+        )
+        self.config["model"]["model_dict"]["model_2"]["data_stat_protect"] = (
+            data_stat_protect
+        )
+        self.config["model"]["model_dict"]["model_3"]["data_stat_protect"] = (
+            data_stat_protect
+        )
+
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+        self.config = update_deepmd_input(self.config, warning=True)
+        self.config = normalize(self.config, multi_task=True)
+        trainer = get_trainer(deepcopy(self.config), shared_links=self.shared_links)
+        trainer.run()
+
+        # check fparam shared
+        multi_state_dict = trainer.wrapper.model.state_dict()
+        torch.testing.assert_close(
+            multi_state_dict["model_1.atomic_model.fitting_net.fparam_avg"],
+            multi_state_dict["model_2.atomic_model.fitting_net.fparam_avg"],
+        )
+        torch.testing.assert_close(
+            multi_state_dict["model_1.atomic_model.fitting_net.fparam_avg"],
+            multi_state_dict["model_3.atomic_model.fitting_net.fparam_avg"],
+        )
+        torch.testing.assert_close(
+            multi_state_dict["model_1.atomic_model.fitting_net.fparam_inv_std"],
+            multi_state_dict["model_2.atomic_model.fitting_net.fparam_inv_std"],
+        )
+        torch.testing.assert_close(
+            multi_state_dict["model_1.atomic_model.fitting_net.fparam_inv_std"],
+            multi_state_dict["model_3.atomic_model.fitting_net.fparam_inv_std"],
+        )
+
+        # check fitting stat in stat_file is correct
+        fparam_stat_model1 = np.load(f"{self.stat_files}/model_1/O H B/fparam")
+        fparam_stat_model2 = np.load(f"{self.stat_files}/model_2/O H B/fparam")
+        fparam_stat_model3 = np.load(f"{self.stat_files}/model_3/O H B/fparam")
+        fparam_data1 = np.array([self.default_fparam]).repeat(3, axis=0)
+        fparam_data2 = np.load(f"{self.data_file_single[0]}/set.000/fparam.npy")
+        fparam_data3 = np.load(f"{self.data_file[0]}/set.000/fparam.npy")
+        np.testing.assert_almost_equal(
+            fparam_stat_model1[:, 0], [fparam_data1.shape[0]] * 2
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model1[:, 1], fparam_data1.sum(axis=0)
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model1[:, 2], (fparam_data1**2).sum(axis=0)
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model2[:, 0], [fparam_data2.shape[0]] * 2
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model2[:, 1], fparam_data2.sum(axis=0)
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model2[:, 2], (fparam_data2**2).sum(axis=0)
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model3[:, 0], [fparam_data3.shape[0]] * 2
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model3[:, 1], fparam_data3.sum(axis=0)
+        )
+        np.testing.assert_almost_equal(
+            fparam_stat_model3[:, 2], (fparam_data3**2).sum(axis=0)
+        )
+
+        # check shared fitting stat is computed correctly
+        weighted_avg, weighted_std = get_weighted_fitting_stat(
+            model_prob,
+            fparam_data1,
+            fparam_data2,
+            fparam_data3,
+            protection=data_stat_protect,
+        )
+        np.testing.assert_almost_equal(
+            weighted_avg,
+            to_numpy_array(
+                multi_state_dict["model_1.atomic_model.fitting_net.fparam_avg"]
+            ),
+        )
+        np.testing.assert_almost_equal(
+            1 / weighted_std,
+            to_numpy_array(
+                multi_state_dict["model_1.atomic_model.fitting_net.fparam_inv_std"]
+            ),
+        )
+
+    def tearDown(self) -> None:
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pt"):
+                os.remove(f)
+            if f in ["lcurve.out", "checkpoint"]:
+                os.remove(f)
+            if f in [self.stat_files]:
+                shutil.rmtree(f)
diff --git a/source/tests/pt/test_loss_tensor.py b/source/tests/pt/test_loss_tensor.py
index 5802c0b775..67dcb568e1 100644
--- a/source/tests/pt/test_loss_tensor.py
+++ b/source/tests/pt/test_loss_tensor.py
@@ -57,7 +57,7 @@ def get_single_batch(dataset, index=None):
         if key in np_batch.keys():
             np_batch[key] = np.expand_dims(np_batch[key], axis=0)
             pt_batch[key] = torch.as_tensor(np_batch[key], device=env.DEVICE)
-            if key in ["coord", "atom_dipole"]:
+            if key in ["coord", "atom_dipole", "atom_polarizability"]:
                 np_batch[key] = np_batch[key].reshape(1, -1)
     np_batch["natoms"] = np_batch["natoms"][0]
     return np_batch, pt_batch
diff --git a/source/tests/pt/test_model_compression_se_atten.py b/source/tests/pt/test_model_compression_se_atten.py
new file mode 100644
index 0000000000..41818f81d7
--- /dev/null
+++ b/source/tests/pt/test_model_compression_se_atten.py
@@ -0,0 +1,1019 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
+    default_places = 4
+else:
+    default_places = 10
+
+
+def _file_delete(file) -> None:
+    if os.path.isdir(file):
+        shutil.rmtree(file)
+    elif os.path.isfile(file):
+        os.remove(file)
+
+
+def _init_models():
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / "dp-original-se-atten.pth")
+    compressed_model = str(tests_path / "dp-compressed-se-atten.pth")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+
+    # Configure se_atten descriptor with attn_layer=0 for full compression compatibility (both type embedding and geometric parts)
+    jdata["model"]["descriptor"] = {
+        "type": "se_atten_v2",
+        "sel": 120,
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+        "attn": 128,
+        "attn_layer": 0,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "precision": "float64",
+    }
+
+    jdata["training"]["training_data"]["systems"] = data_file
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = run_dp("dp --pt train " + INPUT)
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp --pt freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+    ret = run_dp(
+        "dp --pt compress "
+        + " -i "
+        + frozen_model
+        + " -o "
+        + compressed_model
+        + " -t "
+        + INPUT
+    )
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
+    return INPUT, frozen_model, compressed_model
+
+
+def _init_models_exclude_types():
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / "dp-original-se-atten-exclude-types.pth")
+    compressed_model = str(tests_path / "dp-compressed-se-atten-exclude-types.pth")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+
+    # Configure se_atten descriptor with exclude_types
+    jdata["model"]["descriptor"] = {
+        "type": "se_atten_v2",
+        "exclude_types": [[0, 1]],
+        "sel": 120,
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+        "attn": 128,
+        "attn_layer": 0,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "precision": "float64",
+    }
+
+    jdata["training"]["training_data"]["systems"] = data_file
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = run_dp("dp --pt train " + INPUT)
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp --pt freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+    ret = run_dp(
+        "dp --pt compress "
+        + " -i "
+        + frozen_model
+        + " -o "
+        + compressed_model
+        + " -t "
+        + INPUT
+    )
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
+    return INPUT, frozen_model, compressed_model
+
+
+def _init_models_nonzero_attn_layer():
+    """Initialize models with attn_layer > 0 for partial compression testing."""
+    suffix = "-nonzero-attn"
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / f"dp-original-se-atten{suffix}.pth")
+    compressed_model = str(tests_path / f"dp-compressed-se-atten{suffix}.pth")
+    INPUT = str(tests_path / f"input{suffix}.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+
+    # Configure se_atten descriptor with attn_layer=2 for partial compression
+    # Only type embedding will be compressed, geometric parts (attention layers) will not
+    jdata["model"]["descriptor"] = {
+        "type": "se_atten_v2",
+        "sel": 120,
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+        "attn": 128,
+        "attn_layer": 2,  # Non-zero attention layer for partial compression testing
+        "attn_dotr": True,
+        "attn_mask": False,
+        "precision": "float64",
+    }
+
+    jdata["training"]["training_data"]["systems"] = data_file
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = run_dp("dp --pt train " + INPUT)
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp --pt freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+    ret = run_dp(
+        "dp --pt compress "
+        + " -i "
+        + frozen_model
+        + " -o "
+        + compressed_model
+        + " -t "
+        + INPUT
+    )
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
+    return INPUT, frozen_model, compressed_model
+
+
+def _init_models_skip_neighbor_stat():
+    suffix = "-skip-neighbor-stat"
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / f"dp-original-se-atten{suffix}.pth")
+    compressed_model = str(tests_path / f"dp-compressed-se-atten{suffix}.pth")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+
+    # Configure se_atten descriptor
+    jdata["model"]["descriptor"] = {
+        "type": "se_atten_v2",
+        "sel": 120,
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+        "attn": 128,
+        "attn_layer": 0,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "precision": "float64",
+    }
+
+    jdata["training"]["training_data"]["systems"] = data_file
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = run_dp("dp --pt train " + INPUT + " --skip-neighbor-stat")
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp --pt freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+    ret = run_dp(
+        "dp --pt compress "
+        + " -i "
+        + frozen_model
+        + " -o "
+        + compressed_model
+        + " -t "
+        + INPUT
+    )
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
+    return INPUT, frozen_model, compressed_model
+
+
+def setUpModule() -> None:
+    global \
+        INPUT, \
+        FROZEN_MODEL, \
+        COMPRESSED_MODEL, \
+        INPUT_ET, \
+        FROZEN_MODEL_ET, \
+        COMPRESSED_MODEL_ET, \
+        INPUT_NONZERO_ATTN, \
+        FROZEN_MODEL_NONZERO_ATTN, \
+        COMPRESSED_MODEL_NONZERO_ATTN, \
+        FROZEN_MODEL_SKIP_NEIGHBOR_STAT, \
+        COMPRESSED_MODEL_SKIP_NEIGHBOR_STAT
+    INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
+    _, FROZEN_MODEL_SKIP_NEIGHBOR_STAT, COMPRESSED_MODEL_SKIP_NEIGHBOR_STAT = (
+        _init_models_skip_neighbor_stat()
+    )
+    INPUT_ET, FROZEN_MODEL_ET, COMPRESSED_MODEL_ET = _init_models_exclude_types()
+    INPUT_NONZERO_ATTN, FROZEN_MODEL_NONZERO_ATTN, COMPRESSED_MODEL_NONZERO_ATTN = (
+        _init_models_nonzero_attn_layer()
+    )
+
+
+def tearDownModule() -> None:
+    # Clean up files created by _init_models
+    _file_delete(INPUT)
+    _file_delete(FROZEN_MODEL)
+    _file_delete(COMPRESSED_MODEL)
+    # Clean up files created by _init_models_skip_neighbor_stat
+    _file_delete(FROZEN_MODEL_SKIP_NEIGHBOR_STAT)
+    _file_delete(COMPRESSED_MODEL_SKIP_NEIGHBOR_STAT)
+    # Clean up files created by _init_models_exclude_types
+    _file_delete(INPUT_ET)
+    _file_delete(FROZEN_MODEL_ET)
+    _file_delete(COMPRESSED_MODEL_ET)
+    # Clean up files created by _init_models_nonzero_attn_layer
+    _file_delete(INPUT_NONZERO_ATTN)
+    _file_delete(FROZEN_MODEL_NONZERO_ATTN)
+    _file_delete(COMPRESSED_MODEL_NONZERO_ATTN)
+    # Clean up other artifacts
+    _file_delete("out.json")
+    _file_delete("input_v2_compat.json")
+    _file_delete("checkpoint")
+    _file_delete("lcurve.out")
+    _file_delete("model.ckpt.pt")
+    _file_delete("model.ckpt-1.pt")
+    _file_delete("model-compression/checkpoint")
+    _file_delete("model-compression")
+
+
+class TestDeepPotATPBC(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_attrs(self) -> None:
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+
+class TestDeepPotATNoPBC(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = None
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        coords2 = np.concatenate((self.coords, self.coords))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+
+class TestDeepPotATLargeBoxNoPBC(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_ase(self) -> None:
+        from ase import (
+            Atoms,
+        )
+
+        from deepmd.calculator import (
+            DP,
+        )
+
+        water0 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(FROZEN_MODEL),
+        )
+        water1 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(COMPRESSED_MODEL),
+        )
+        ee0 = water0.get_potential_energy()
+        ff0 = water0.get_forces()
+        ee1 = water1.get_potential_energy()
+        ff1 = water1.get_forces()
+        # nframes = 1
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+
+
+class TestDeepPotATPBCExcludeTypes(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL_ET)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL_ET)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_attrs(self) -> None:
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+
+class TestSkipNeighborStat(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL_SKIP_NEIGHBOR_STAT)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL_SKIP_NEIGHBOR_STAT)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_attrs(self) -> None:
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+
+class TestDeepPotATNonZeroAttnLayer(unittest.TestCase):
+    """Test model compression with attn_layer > 0 (partial compression)."""
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL_NONZERO_ATTN)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL_NONZERO_ATTN)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_attrs(self) -> None:
+        """Test model attributes are consistent between original and compressed models."""
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self) -> None:
+        """Test single frame evaluation with partial compression."""
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values - should be identical even with partial compression
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        """Test single frame atomic evaluation with partial compression."""
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values - should be identical even with partial compression
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        """Test multi-frame atomic evaluation with partial compression."""
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values - should be identical even with partial compression
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_ase(self) -> None:
+        """Test ASE calculator integration with partial compression."""
+        from ase import (
+            Atoms,
+        )
+
+        from deepmd.calculator import (
+            DP,
+        )
+
+        water0 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(FROZEN_MODEL_NONZERO_ATTN),
+        )
+        water1 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(COMPRESSED_MODEL_NONZERO_ATTN),
+        )
+        ee0 = water0.get_potential_energy()
+        ff0 = water0.get_forces()
+        ee1 = water1.get_potential_energy()
+        ff1 = water1.get_forces()
+        # nframes = 1
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_model_compression_se_e3_tebd.py b/source/tests/pt/test_model_compression_se_e3_tebd.py
new file mode 100644
index 0000000000..30976f6ce4
--- /dev/null
+++ b/source/tests/pt/test_model_compression_se_e3_tebd.py
@@ -0,0 +1,789 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
+    default_places = 4
+else:
+    default_places = 9
+
+
+def _file_delete(file) -> None:
+    if os.path.isdir(file):
+        shutil.rmtree(file)
+    elif os.path.isfile(file):
+        os.remove(file)
+
+
+def _init_models():
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / "dp-original-se-e3-tebd.pth")
+    compressed_model = str(tests_path / "dp-compressed-se-e3-tebd.pth")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+
+    # Configure se_e3_tebd descriptor
+    jdata["model"]["descriptor"] = {
+        "type": "se_e3_tebd",
+        "sel": 120,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [2, 4, 8],
+        "tebd_dim": 8,
+        "tebd_input_mode": "strip",
+        "activation_function": "tanh",
+        "resnet_dt": False,
+    }
+
+    jdata["training"]["training_data"]["systems"] = data_file
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = run_dp("dp --pt train " + INPUT)
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp --pt freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+    ret = run_dp(
+        "dp --pt compress "
+        + " -i "
+        + frozen_model
+        + " -o "
+        + compressed_model
+        + " -t "
+        + INPUT
+    )
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
+    return INPUT, frozen_model, compressed_model
+
+
+def _init_models_exclude_types():
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / "dp-original-se-e3-tebd-exclude-types.pth")
+    compressed_model = str(tests_path / "dp-compressed-se-e3-tebd-exclude-types.pth")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+
+    # Configure se_e3_tebd descriptor with exclude_types
+    jdata["model"]["descriptor"] = {
+        "type": "se_e3_tebd",
+        "exclude_types": [[0, 1]],
+        "sel": 120,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [2, 4, 8],
+        "tebd_dim": 8,
+        "tebd_input_mode": "strip",
+        "activation_function": "tanh",
+        "resnet_dt": False,
+    }
+
+    jdata["training"]["training_data"]["systems"] = data_file
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = run_dp("dp --pt train " + INPUT)
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp --pt freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+    ret = run_dp(
+        "dp --pt compress "
+        + " -i "
+        + frozen_model
+        + " -o "
+        + compressed_model
+        + " -t "
+        + INPUT
+    )
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
+    return INPUT, frozen_model, compressed_model
+
+
+def _init_models_skip_neighbor_stat():
+    suffix = "-skip-neighbor-stat"
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / f"dp-original-se-e3-tebd{suffix}.pth")
+    compressed_model = str(tests_path / f"dp-compressed-se-e3-tebd{suffix}.pth")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+
+    # Configure se_e3_tebd descriptor
+    jdata["model"]["descriptor"] = {
+        "type": "se_e3_tebd",
+        "sel": 120,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [2, 4, 8],
+        "tebd_dim": 8,
+        "tebd_input_mode": "strip",
+        "activation_function": "tanh",
+        "resnet_dt": False,
+    }
+
+    jdata["training"]["training_data"]["systems"] = data_file
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = run_dp("dp --pt train " + INPUT + " --skip-neighbor-stat")
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp --pt freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+    ret = run_dp(
+        "dp --pt compress "
+        + " -i "
+        + frozen_model
+        + " -o "
+        + compressed_model
+        + " -t "
+        + INPUT
+    )
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
+    return INPUT, frozen_model, compressed_model
+
+
+def setUpModule() -> None:
+    global \
+        INPUT, \
+        FROZEN_MODEL, \
+        COMPRESSED_MODEL, \
+        INPUT_ET, \
+        FROZEN_MODEL_ET, \
+        COMPRESSED_MODEL_ET, \
+        FROZEN_MODEL_SKIP_NEIGHBOR_STAT, \
+        COMPRESSED_MODEL_SKIP_NEIGHBOR_STAT
+    INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
+    _, FROZEN_MODEL_SKIP_NEIGHBOR_STAT, COMPRESSED_MODEL_SKIP_NEIGHBOR_STAT = (
+        _init_models_skip_neighbor_stat()
+    )
+    INPUT_ET, FROZEN_MODEL_ET, COMPRESSED_MODEL_ET = _init_models_exclude_types()
+
+
+def tearDownModule() -> None:
+    # Clean up files created by _init_models
+    _file_delete(INPUT)
+    _file_delete(FROZEN_MODEL)
+    _file_delete(COMPRESSED_MODEL)
+    # Clean up files created by _init_models_skip_neighbor_stat
+    _file_delete(FROZEN_MODEL_SKIP_NEIGHBOR_STAT)
+    _file_delete(COMPRESSED_MODEL_SKIP_NEIGHBOR_STAT)
+    # Clean up files created by _init_models_exclude_types
+    _file_delete(INPUT_ET)
+    _file_delete(FROZEN_MODEL_ET)
+    _file_delete(COMPRESSED_MODEL_ET)
+    # Clean up other artifacts
+    _file_delete("out.json")
+    _file_delete("input_v2_compat.json")
+    _file_delete("checkpoint")
+    _file_delete("lcurve.out")
+    _file_delete("model.ckpt.pt")
+    _file_delete("model.ckpt-1.pt")
+    _file_delete("model-compression/checkpoint")
+    _file_delete("model-compression")
+
+
+class TestDeepPotAPBC(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_attrs(self) -> None:
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 4.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 4.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+
+class TestDeepPotANoPBC(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = None
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        coords2 = np.concatenate((self.coords, self.coords))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+
+class TestDeepPotALargeBoxNoPBC(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_ase(self) -> None:
+        from ase import (
+            Atoms,
+        )
+
+        from deepmd.calculator import (
+            DP,
+        )
+
+        water0 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(FROZEN_MODEL),
+        )
+        water1 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(COMPRESSED_MODEL),
+        )
+        ee0 = water0.get_potential_energy()
+        ff0 = water0.get_forces()
+        ee1 = water1.get_potential_energy()
+        ff1 = water1.get_forces()
+        # nframes = 1
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+
+
+class TestDeepPotAPBCExcludeTypes(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL_ET)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL_ET)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_attrs(self) -> None:
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 4.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 4.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+
+class TestSkipNeighborStat(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.dp_original = DeepEval(FROZEN_MODEL_SKIP_NEIGHBOR_STAT)
+        cls.dp_compressed = DeepEval(COMPRESSED_MODEL_SKIP_NEIGHBOR_STAT)
+        cls.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
+        cls.atype = [0, 1, 1, 0, 1, 1]
+        cls.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+
+    def test_attrs(self) -> None:
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 4.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 4.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self) -> None:
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self) -> None:
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self) -> None:
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_tabulate.py b/source/tests/pt/test_tabulate.py
index 164819408f..21083f7bdd 100644
--- a/source/tests/pt/test_tabulate.py
+++ b/source/tests/pt/test_tabulate.py
@@ -4,6 +4,9 @@
 import numpy as np
 import torch
 
+from deepmd.dpmodel.utils.network import (
+    get_activation_fn,
+)
 from deepmd.pt.utils import (
     env,
 )
@@ -18,6 +21,24 @@
     tf,
 )
 
+ACTIVATION_NAMES = {
+    1: "tanh",
+    2: "gelu",
+    3: "relu",
+    4: "relu6",
+    5: "softplus",
+    6: "sigmoid",
+    7: "silu",
+}
+
+
+def get_activation_function(functype: int):
+    """Get activation function corresponding to functype."""
+    if functype not in ACTIVATION_NAMES:
+        raise ValueError(f"Unknown functype: {functype}")
+
+    return get_activation_fn(ACTIVATION_NAMES[functype])
+
 
 def setUpModule() -> None:
     tf.compat.v1.enable_eager_execution()
@@ -43,92 +64,129 @@ def setUp(self) -> None:
 
         self.xbar = np.matmul(self.x, self.w) + self.b  # 4 x 4
 
-        self.y = np.tanh(self.xbar)
-
     def test_ops(self) -> None:
+        """Test all activation functions using parameterized subtests."""
+        for functype in ACTIVATION_NAMES.keys():
+            activation_name = ACTIVATION_NAMES[functype]
+            activation_fn = get_activation_function(functype)
+
+            with self.subTest(activation=activation_name, functype=functype):
+                self._test_single_activation(functype, activation_fn, activation_name)
+
+    def _test_single_activation(
+        self, functype: int, activation_fn, activation_name: str
+    ) -> None:
+        """Test tabulation operations for a specific activation function."""
+        # Compute y using the specific activation function
+        y = activation_fn(self.xbar)
+
+        # Test unaggregated_dy_dx_s
         dy_tf = op_module.unaggregated_dy_dx_s(
-            tf.constant(self.y, dtype="double"),
+            tf.constant(y, dtype="double"),
             tf.constant(self.w, dtype="double"),
             tf.constant(self.xbar, dtype="double"),
-            tf.constant(1),
+            tf.constant(functype),
         )
 
         dy_pt = unaggregated_dy_dx_s(
-            torch.from_numpy(self.y),
+            torch.from_numpy(y),
             self.w,
             torch.from_numpy(self.xbar),
-            1,
+            functype,
         )
 
         dy_tf_numpy = dy_tf.numpy()
         dy_pt_numpy = dy_pt.detach().cpu().numpy()
 
-        np.testing.assert_almost_equal(dy_tf_numpy, dy_pt_numpy, decimal=10)
+        np.testing.assert_almost_equal(
+            dy_tf_numpy,
+            dy_pt_numpy,
+            decimal=10,
+            err_msg=f"unaggregated_dy_dx_s failed for {activation_name}",
+        )
 
+        # Test unaggregated_dy2_dx_s
         dy2_tf = op_module.unaggregated_dy2_dx_s(
-            tf.constant(self.y, dtype="double"),
+            tf.constant(y, dtype="double"),
             dy_tf,
             tf.constant(self.w, dtype="double"),
             tf.constant(self.xbar, dtype="double"),
-            tf.constant(1),
+            tf.constant(functype),
         )
 
         dy2_pt = unaggregated_dy2_dx_s(
-            torch.from_numpy(self.y),
+            torch.from_numpy(y),
             dy_pt,
             self.w,
             torch.from_numpy(self.xbar),
-            1,
+            functype,
         )
 
         dy2_tf_numpy = dy2_tf.numpy()
         dy2_pt_numpy = dy2_pt.detach().cpu().numpy()
 
-        np.testing.assert_almost_equal(dy2_tf_numpy, dy2_pt_numpy, decimal=10)
+        np.testing.assert_almost_equal(
+            dy2_tf_numpy,
+            dy2_pt_numpy,
+            decimal=10,
+            err_msg=f"unaggregated_dy2_dx_s failed for {activation_name}",
+        )
 
+        # Test unaggregated_dy_dx
         dz_tf = op_module.unaggregated_dy_dx(
-            tf.constant(self.y, dtype="double"),
+            tf.constant(y, dtype="double"),
             tf.constant(self.w, dtype="double"),
             dy_tf,
             tf.constant(self.xbar, dtype="double"),
-            tf.constant(1),
+            tf.constant(functype),
         )
 
         dz_pt = unaggregated_dy_dx(
-            torch.from_numpy(self.y).to(env.DEVICE),
+            torch.from_numpy(y).to(env.DEVICE),
             self.w,
             dy_pt,
             torch.from_numpy(self.xbar).to(env.DEVICE),
-            1,
+            functype,
         )
 
         dz_tf_numpy = dz_tf.numpy()
         dz_pt_numpy = dz_pt.detach().cpu().numpy()
 
-        np.testing.assert_almost_equal(dz_tf_numpy, dz_pt_numpy, decimal=10)
+        np.testing.assert_almost_equal(
+            dz_tf_numpy,
+            dz_pt_numpy,
+            decimal=10,
+            err_msg=f"unaggregated_dy_dx failed for {activation_name}",
+        )
 
+        # Test unaggregated_dy2_dx
         dy2_tf = op_module.unaggregated_dy2_dx(
-            tf.constant(self.y, dtype="double"),
+            tf.constant(y, dtype="double"),
             tf.constant(self.w, dtype="double"),
             dy_tf,
             dy2_tf,
             tf.constant(self.xbar, dtype="double"),
-            tf.constant(1),
+            tf.constant(functype),
         )
 
         dy2_pt = unaggregated_dy2_dx(
-            torch.from_numpy(self.y).to(env.DEVICE),
+            torch.from_numpy(y).to(env.DEVICE),
             self.w,
             dy_pt,
             dy2_pt,
             torch.from_numpy(self.xbar).to(env.DEVICE),
-            1,
+            functype,
         )
 
         dy2_tf_numpy = dy2_tf.numpy()
         dy2_pt_numpy = dy2_pt.detach().cpu().numpy()
 
-        np.testing.assert_almost_equal(dy2_tf_numpy, dy2_pt_numpy, decimal=10)
+        np.testing.assert_almost_equal(
+            dy2_tf_numpy,
+            dy2_pt_numpy,
+            decimal=10,
+            err_msg=f"unaggregated_dy2_dx failed for {activation_name}",
+        )
 
 
 if __name__ == "__main__":
diff --git a/source/tests/pt/test_tabulate_fusion_se_t_tebd.py b/source/tests/pt/test_tabulate_fusion_se_t_tebd.py
new file mode 100644
index 0000000000..10bf48c46f
--- /dev/null
+++ b/source/tests/pt/test_tabulate_fusion_se_t_tebd.py
@@ -0,0 +1,1918 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import torch
+
+from deepmd.pt.cxx_op import (
+    ENABLE_CUSTOMIZED_OP,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from ..consistent.common import (
+    parameterized,
+)
+
+
+@parameterized((torch.float64, torch.float32))
+@unittest.skipIf(not ENABLE_CUSTOMIZED_OP, "PyTorch customized OPs are not built")
+class TestTabulateFusionSeTTebdOp(unittest.TestCase):
+    def setUp(self) -> None:
+        (dtype,) = self.param
+        if dtype == torch.float64:
+            self.prec = 1e-10
+        elif dtype == torch.float32:
+            # JZ: not sure the reason, but 1e-5 cannot pass the grad test
+            self.prec = 1e-3
+        self.table_tensor = torch.tensor(
+            [
+                -1.0600000163027882e02,
+                7.7059358807135015e02,
+                -5.6954714749735385e03,
+                1.2167808756610991e03,
+                -7.6199102434332218e01,
+                1.0706136029373441e00,
+                -1.0600000164528124e02,
+                7.7059358630452323e02,
+                -5.6954715659539552e03,
+                1.2167808757436076e03,
+                -7.6199099707724926e01,
+                1.0706134206080884e00,
+                -1.0600000163027882e02,
+                7.7059358807135015e02,
+                -5.6954714749735385e03,
+                1.2167808756610991e03,
+                -7.6199102434332218e01,
+                1.0706136029373441e00,
+                -1.0600000164528124e02,
+                7.7059358630452323e02,
+                -5.6954715659539552e03,
+                1.2167808757436076e03,
+                -7.6199099707724926e01,
+                1.0706134206080884e00,
+                -9.6000006759336443e01,
+                6.2969719646863621e02,
+                -4.2053706363664551e03,
+                9.0372155784831205e02,
+                -5.7600014239472898e01,
+                8.6528676197113796e-01,
+                -9.6000006828502180e01,
+                6.2969718981238339e02,
+                -4.2053709121998018e03,
+                9.0372156236848912e02,
+                -5.7600006817493266e01,
+                8.6528625106787871e-01,
+                -9.6000006759336443e01,
+                6.2969719646863621e02,
+                -4.2053706363664551e03,
+                9.0372155784831205e02,
+                -5.7600014239472898e01,
+                8.6528676197113796e-01,
+                -9.6000006828502180e01,
+                6.2969718981238339e02,
+                -4.2053709121998018e03,
+                9.0372156236848912e02,
+                -5.7600006817493266e01,
+                8.6528625106787871e-01,
+                -8.6000028021606425e01,
+                5.0303296429845562e02,
+                -3.0008648248894533e03,
+                6.4939597734382562e02,
+                -4.2250984019314707e01,
+                6.8180015607155764e-01,
+                -8.6000028340480625e01,
+                5.0303293978396903e02,
+                -3.0008656209622986e03,
+                6.4939600529391078e02,
+                -4.2250965541906716e01,
+                6.8179882734268982e-01,
+                -8.6000028021606425e01,
+                5.0303296429845562e02,
+                -3.0008648248894533e03,
+                6.4939597734382562e02,
+                -4.2250984019314707e01,
+                6.8180015607155764e-01,
+                -8.6000028340480625e01,
+                5.0303293978396903e02,
+                -3.0008656209622986e03,
+                6.4939600529353049e02,
+                -4.2250965541830588e01,
+                6.8179882733888086e-01,
+                -7.6000116148038558e01,
+                3.9060139597613619e02,
+                -2.0515743554479322e03,
+                4.4772754091167945e02,
+                -2.9848087537832814e01,
+                5.2014755686537917e-01,
+                -7.6000117618125429e01,
+                3.9060130821883052e02,
+                -2.0515765138621105e03,
+                4.4772766653712006e02,
+                -2.9848047259266409e01,
+                5.2014443989116910e-01,
+                -7.6000116148038558e01,
+                3.9060139597613619e02,
+                -2.0515743554479322e03,
+                4.4772754091167945e02,
+                -2.9848087537832814e01,
+                5.2014755686537917e-01,
+                -7.6000117618125742e01,
+                3.9060130821877993e02,
+                -2.0515765138659344e03,
+                4.4772766652483722e02,
+                -2.9848047256692499e01,
+                5.2014443976043645e-01,
+                -6.6000481290731443e01,
+                2.9240425245900917e02,
+                -1.3271250821434478e03,
+                2.9263955624337893e02,
+                -2.0087224005740719e01,
+                3.8031147992206349e-01,
+                -6.6000488067863742e01,
+                2.9240394960550276e02,
+                -1.3271304743966571e03,
+                2.9264002765325057e02,
+                -2.0087154325946980e01,
+                3.8030522013794582e-01,
+                -6.6000481290731443e01,
+                2.9240425245900917e02,
+                -1.3271250821434478e03,
+                2.9263955624337893e02,
+                -2.0087224005740719e01,
+                3.8031147992206349e-01,
+                -6.6000488067883694e01,
+                2.9240394960308691e02,
+                -1.3271304745319526e03,
+                2.9264002727267626e02,
+                -2.0087154245656002e01,
+                3.8030521605011575e-01,
+                -5.6001992867343972e01,
+                2.0844745574402617e02,
+                -7.9715799906587699e02,
+                1.7805563184427194e02,
+                -1.2663929104029080e01,
+                2.6224978307822894e-01,
+                -5.6002024103130161e01,
+                2.0844646075692629e02,
+                -7.9717003898786652e02,
+                1.7805715054974732e02,
+                -1.2663864677938077e01,
+                2.6224029170957303e-01,
+                -5.6001992867343972e01,
+                2.0844745574402617e02,
+                -7.9715799906587699e02,
+                1.7805563184427194e02,
+                -1.2663929104029080e01,
+                2.6224978307822894e-01,
+                -5.6002024104383771e01,
+                2.0844646064871867e02,
+                -7.9717004324410516e02,
+                1.7805714044473001e02,
+                -1.2663862524337585e01,
+                2.6224018166598279e-01,
+                -4.6008230210744550e01,
+                1.3874976550319553e02,
+                -4.3134867537287749e02,
+                9.7902623595157010e01,
+                -7.2734403121911884e00,
+                1.6589123996688057e-01,
+                -4.6008373996710617e01,
+                1.3874671965012058e02,
+                -4.3137141216256458e02,
+                9.7906861443792735e01,
+                -7.2735856084076280e00,
+                1.6588642735924275e-01,
+                -4.6008230210744550e01,
+                1.3874976550319553e02,
+                -4.3134867537287749e02,
+                9.7902623595157010e01,
+                -7.2734403121911884e00,
+                1.6589123996688057e-01,
+                -4.6008374075307870e01,
+                1.3874671513440606e02,
+                -4.3137152784492957e02,
+                9.7906652364871050e01,
+                -7.2735401377994249e00,
+                1.6588408717348646e-01,
+                -3.6033642533368131e01,
+                8.3364086172019398e01,
+                -1.9942175516407502e02,
+                4.6124022747838069e01,
+                -3.6130563858549958e00,
+                9.1249773312287188e-02,
+                -3.6034298111245583e01,
+                8.3355843868269616e01,
+                -1.9945266030093268e02,
+                4.6135000705962462e01,
+                -3.6142786797647353e00,
+                9.1293932043118198e-02,
+                -3.6033642533368131e01,
+                8.3364086172019398e01,
+                -1.9942175516407502e02,
+                4.6124022747838069e01,
+                -3.6130563858549958e00,
+                9.1249773312287188e-02,
+                -3.6034302998781108e01,
+                8.3355675173745269e01,
+                -1.9945516784358935e02,
+                4.6132303200740992e01,
+                -3.6136582565667807e00,
+                9.1261386291659793e-02,
+                -2.6132076703837274e01,
+                4.2398929436319683e01,
+                -7.1037171119057973e01,
+                1.3425662262407457e01,
+                -7.5172495708992593e-01,
+                7.7522572203268742e-03,
+                -2.6134776894873077e01,
+                4.2384732735328775e01,
+                -7.1030526549717337e01,
+                1.3431455085299461e01,
+                -7.5302028721199155e-01,
+                7.8186246126207160e-03,
+                -2.6132076703837274e01,
+                4.2398929436319683e01,
+                -7.1037171119057973e01,
+                1.3425662262405055e01,
+                -7.5172495708944420e-01,
+                7.7522572203027138e-03,
+                -2.6135071381093578e01,
+                4.2379566840123424e01,
+                -7.1067162844830236e01,
+                1.3434603316099608e01,
+                -7.5251233833488806e-01,
+                7.7734884077347950e-03,
+                -2.2221480705551805e01,
+                3.0067218434037404e01,
+                -4.1779705297521097e01,
+                -1.9077757705724110e02,
+                3.6413466026808294e02,
+                -1.6067397401486718e02,
+                -2.2225430071703467e01,
+                3.0060809113889512e01,
+                -4.1712800191721314e01,
+                -1.9084786311022177e02,
+                3.6410062714257685e02,
+                -1.6063028238785057e02,
+                -2.2221480705551830e01,
+                3.0067218434036263e01,
+                -4.1779705297545611e01,
+                -1.9077757705723738e02,
+                3.6413466026815809e02,
+                -1.6067397401492047e02,
+                -2.2226913938674084e01,
+                3.0042371820589185e01,
+                -4.1801582285426832e01,
+                -1.9048619249019526e02,
+                3.6373874557858261e02,
+                -1.6052358406417352e02,
+                -2.1250858373060836e01,
+                2.7343847665267702e01,
+                -3.6044215009418814e01,
+                -1.7618484800469861e02,
+                3.3120085405644409e02,
+                -1.4534825256321494e02,
+                -2.1254939505030809e01,
+                2.7342716030835884e01,
+                -3.5955450545431681e01,
+                -1.7635550119316844e02,
+                3.3127447930769307e02,
+                -1.4533876561022046e02,
+                -2.1250858373060954e01,
+                2.7343847665262818e01,
+                -3.6044215009514119e01,
+                -1.7618484800464822e02,
+                3.3120085405666612e02,
+                -1.4534825256338749e02,
+                -2.1257155379297881e01,
+                2.7317691772612619e01,
+                -3.6063526926252166e01,
+                -1.7588696592837897e02,
+                3.3079005662384850e02,
+                -1.4519086534447842e02,
+                -2.0283472228681301e01,
+                2.4763027042036295e01,
+                -3.0876160316998963e01,
+                -1.6184864900381874e02,
+                2.9976970905591691e02,
+                -1.3084395423768876e02,
+                -2.0287461515322455e01,
+                2.4769400540137131e01,
+                -3.0762734380983186e01,
+                -1.6214886052089241e02,
+                2.9998995088792128e02,
+                -1.3088331758129965e02,
+                -2.0283472228681809e01,
+                2.4763027042017129e01,
+                -3.0876160317336627e01,
+                -1.6184864900359682e02,
+                2.9976970905662938e02,
+                -1.3084395423826805e02,
+                -2.0290765181946348e01,
+                2.4735639907973120e01,
+                -3.0892738413082597e01,
+                -1.6154574482310053e02,
+                2.9934595420013272e02,
+                -1.3068028494926122e02,
+                -1.9319499689234629e01,
+                2.2323824431805683e01,
+                -2.6243395369841849e01,
+                -1.4782286378121026e02,
+                2.6985759662396487e02,
+                -1.1715474197881395e02,
+                -1.9323022570439292e01,
+                2.2340565860680357e01,
+                -2.6102786429129356e01,
+                -1.4828764857305418e02,
+                2.7027298759214750e02,
+                -1.1726163007473576e02,
+                -1.9319499689236839e01,
+                2.2323824431730525e01,
+                -2.6243395371031539e01,
+                -1.4782286378021576e02,
+                2.6985759662609979e02,
+                -1.1715474198068593e02,
+                -1.9327939259284843e01,
+                2.2295320666731183e01,
+                -2.6257097174199931e01,
+                -1.4751677383623073e02,
+                2.6942341041084092e02,
+                -1.1698575776762208e02,
+                -1.8359079763330211e01,
+                2.0025118950280675e01,
+                -2.2113826757823226e01,
+                -1.3415932552431914e02,
+                2.4147795894487624e02,
+                -1.0427314537549884e02,
+                -1.8361534194530734e01,
+                2.0055847278170305e01,
+                -2.1944107342764479e01,
+                -1.3482982214648752e02,
+                2.4214772485703989e02,
+                -1.0447085300268679e02,
+                -1.8359079763339750e01,
+                2.0025118949989704e01,
+                -2.2113826761939308e01,
+                -1.3415932552009582e02,
+                2.4147795895089951e02,
+                -1.0427314538136979e02,
+                -1.8368836959765495e01,
+                1.9995657614892380e01,
+                -2.2124533894067383e01,
+                -1.3385233293246981e02,
+                2.4103659293914149e02,
+                -1.0410011400771683e02,
+                -1.7402299525814517e01,
+                1.7865597763687486e01,
+                -1.8455503416511757e01,
+                -1.2090765118569301e02,
+                2.1464125749038132e02,
+                -9.2190581022134992e01,
+                -1.7402744551259310e01,
+                1.7914800567904472e01,
+                -1.8255754666855470e01,
+                -1.2183089355280822e02,
+                2.1563582256173194e02,
+                -9.2507405324257306e01,
+                -1.7402299525855486e01,
+                1.7865597762572605e01,
+                -1.8455503430527756e01,
+                -1.2090765116826699e02,
+                2.1464125750558804e02,
+                -9.2190581039770791e01,
+                -1.7413567239985614e01,
+                1.7835392747330133e01,
+                -1.8463115133795956e01,
+                -1.2060260469703572e02,
+                2.1419685510959093e02,
+                -9.2015134441585104e01,
+                -1.6449179896085464e01,
+                1.5843762224435309e01,
+                -1.5236722252652665e01,
+                -1.0811515163854509e02,
+                1.8935506712501905e02,
+                -8.0897437157402223e01,
+                -1.6446174965543889e01,
+                1.5916874201410112e01,
+                -1.5007553197461570e01,
+                -1.0934291295595986e02,
+                1.9075532567542470e02,
+                -8.1366596347119696e01,
+                -1.6449179896260411e01,
+                1.5843762220214204e01,
+                -1.5236722299508587e01,
+                -1.0811515156878269e02,
+                1.8935506715588940e02,
+                -8.0897437207525684e01,
+                -1.6462173655481337e01,
+                1.5813096619069219e01,
+                -1.5241142983208677e01,
+                -1.0781563484017332e02,
+                1.8891289499393798e02,
+                -8.0721658713418606e01,
+                -1.5499661595231082e01,
+                1.3957945516559789e01,
+                -1.2426145992195885e01,
+                -9.5826844741964834e01,
+                1.6562434781973772e02,
+                -7.0383233416004117e01,
+                -1.5491037589250178e01,
+                1.4061349904707843e01,
+                -1.2170301483989650e01,
+                -9.7412966929875139e01,
+                1.6751874597575440e02,
+                -7.1041920384880939e01,
+                -1.5499661595973759e01,
+                1.3957945500778198e01,
+                -1.2426146145776961e01,
+                -9.5826844470313858e01,
+                1.6562434784656404e02,
+                -7.0383233547510557e01,
+                -1.5514618579274794e01,
+                1.3927192540790591e01,
+                -1.2427264674287118e01,
+                -9.5537423121432880e01,
+                1.6519113036542510e02,
+                -7.0209783384625098e01,
+                -1.4553592409098401e01,
+                1.2206343505203831e01,
+                -9.9929274597052196e00,
+                -8.4085595900823435e01,
+                1.4345191724964303e02,
+                -6.0636862050381758e01,
+                -1.4536130507533649e01,
+                1.2347228125716077e01,
+                -9.7159302678980044e00,
+                -8.6081002959763751e01,
+                1.4592996741513730e02,
+                -6.1523840242331410e01,
+                -1.4553592412232879e01,
+                1.2206343446986155e01,
+                -9.9929279524397305e00,
+                -8.4085594870780753e01,
+                1.4345191706222485e02,
+                -6.0636862352071532e01,
+                -1.4570766853404239e01,
+                1.2175998366492486e01,
+                -9.9905856922863112e00,
+                -8.3812185051328299e01,
+                1.4303633648493073e02,
+                -6.0469165577726159e01,
+                -1.3610717065161962e01,
+                1.0587059629986399e01,
+                -7.9068321681349163e00,
+                -7.2932404423885004e01,
+                1.2283913327111270e02,
+                -5.1646910322317169e01,
+                -1.3579708436673444e01,
+                1.0773027159520954e01,
+                -7.6175370796795425e00,
+                -7.5376833196183071e01,
+                1.2597958225245242e02,
+                -5.2797863799745748e01,
+                -1.3610717078313911e01,
+                1.0587059418306087e01,
+                -7.9068337121483454e00,
+                -7.2932400620636059e01,
+                1.2283913169238102e02,
+                -5.1646910832841897e01,
+                -1.3630368323321786e01,
+                1.0557789879027116e01,
+                -7.9007777139483810e00,
+                -7.2682825476758552e01,
+                1.2245259140017740e02,
+                -5.1489446559796768e01,
+                -1.2670671078399982e01,
+                9.0981634949263963e00,
+                -6.1383490362855788e00,
+                -6.2406844162279825e01,
+                1.0378677653422224e02,
+                -4.3402055519687693e01,
+                -1.2619333100308433e01,
+                9.3364634226935799e00,
+                -5.8491811509717584e00,
+                -6.5316414528433455e01,
+                1.0763857666200300e02,
+                -4.4841832720191050e01,
+                -1.2670671133253135e01,
+                9.0981627374157021e00,
+                -6.1383537481895356e00,
+                -6.2406830503476570e01,
+                1.0378676818216074e02,
+                -4.3402055529436716e01,
+                -1.2693036794620980e01,
+                9.0708908225804148e00,
+                -6.1281713411274001e00,
+                -6.2191660620037396e01,
+                1.0344456594081470e02,
+                -4.3260806640248063e01,
+                -1.1732979767504439e01,
+                7.7377614739662697e00,
+                -4.6587775146685351e00,
+                -5.2547655563671029e01,
+                8.6296103981829802e01,
+                -3.5891515805495345e01,
+                -1.1651721415208119e01,
+                8.0340005825064456e00,
+                -4.3852919661646119e00,
+                -5.5898160750405737e01,
+                9.0851291378134590e01,
+                -3.7622755083739385e01,
+                -1.1732979994779518e01,
+                7.7377588120662892e00,
+                -4.6587914600219875e00,
+                -5.2547607987974565e01,
+                8.6296066930227624e01,
+                -3.5891510429190419e01,
+                -1.1758218632638741e01,
+                7.7137968422318544e00,
+                -4.6438239588320966e00,
+                -5.2381405657406454e01,
+                8.6019170302439520e01,
+                -3.5774653697918737e01,
+                -1.0797063195543267e01,
+                6.5040766534586290e00,
+                -3.4402783696562169e00,
+                -4.3393478931462226e01,
+                7.0370032342568010e01,
+                -2.9105535302381853e01,
+                -1.0672637254876815e01,
+                6.8603244928014488e00,
+                -3.1995767859681346e00,
+                -4.7101348454718874e01,
+                7.5530774605740319e01,
+                -3.1094453979913311e01,
+                -1.0797064129672576e01,
+                6.5040675030570139e00,
+                -3.4403181344841500e00,
+                -4.3393319126804485e01,
+                7.0369884883020177e01,
+                -2.9105501594155889e01,
+                -1.0825134802124644e01,
+                6.4853446725127366e00,
+                -3.4195560956016346e00,
+                -4.3296381389022351e01,
+                7.0187483762520671e01,
+                -2.9024415860031247e01,
+                -9.8622468030169337e00,
+                5.3955359781222549e00,
+                -2.4558741324534137e00,
+                -3.4983728078555984e01,
+                5.6014425934291204e01,
+                -2.3035887876475471e01,
+                -9.6769173769353625e00,
+                5.8079540801032961e00,
+                -2.2635143148159220e00,
+                -3.8890523502249145e01,
+                6.1563046720547966e01,
+                -2.5198820521877391e01,
+                -9.8622505990399034e00,
+                5.3955054149765509e00,
+                -2.4559821583353774e00,
+                -3.4983216045684472e01,
+                5.6013889382190079e01,
+                -2.3035736114340502e01,
+                -9.8926597117464805e00,
+                5.3849440641688187e00,
+                -2.4279562878572039e00,
+                -3.4983707025980287e01,
+                5.5966629574570753e01,
+                -2.3006306589550750e01,
+                -8.9277749780883457e00,
+                4.4108678323349286e00,
+                -1.6793815271288624e00,
+                -2.7359655656676122e01,
+                4.3239544183593061e01,
+                -1.7676416286664047e01,
+                -8.6587749152265552e00,
+                4.8674392165289442e00,
+                -1.5450097170494306e00,
+                -3.1230915545542118e01,
+                4.8829474992442343e01,
+                -1.9874755288141955e01,
+                -8.9277901202336185e00,
+                4.4107699183102085e00,
+                -1.6796551456533098e00,
+                -2.7358123514289456e01,
+                4.3237769027728554e01,
+                -1.7675844947587926e01,
+                -8.9590559763951383e00,
+                4.4128957610428623e00,
+                -1.6423658138809611e00,
+                -2.7493743583145054e01,
+                4.3380518846300511e01,
+                -1.7719639183506050e01,
+                -7.9928164326293913e00,
+                3.5492331091008302e00,
+                -1.0852462622393610e00,
+                -2.0565792757352423e01,
+                3.2061909496398073e01,
+                -1.3023704651715642e01,
+                -7.6125412569887647e00,
+                4.0287966748633526e00,
+                -1.0084592804412351e00,
+                -2.4116992333062022e01,
+                3.7252797603904497e01,
+                -1.5077495076198684e01,
+                -7.9928747817255603e00,
+                3.5489404571097585e00,
+                -1.0858609980296849e00,
+                -2.0561701094768868e01,
+                3.2056747083970720e01,
+                -1.3021877019728107e01,
+                -8.0213899495838241e00,
+                3.5708128515175943e00,
+                -1.0368753205735253e00,
+                -2.0877831538201836e01,
+                3.2456559535389509e01,
+                -1.3165540198118645e01,
+                -7.0564174984379102e00,
+                2.8104770395789380e00,
+                -6.4821407306458223e-01,
+                -1.4652118176169953e01,
+                2.2507145963021038e01,
+                -9.0780963613608154e00,
+                -6.5338936679228468e00,
+                3.2846161494194233e00,
+                -6.1760141818709846e-01,
+                -1.7606122820367215e01,
+                2.6855555289500277e01,
+                -1.0803821410528570e01,
+                -7.0566263531717324e00,
+                2.8097184139861691e00,
+                -6.4925197579297411e-01,
+                -1.4643483271177150e01,
+                2.2495243692983838e01,
+                -9.0734373052814821e00,
+                -7.0742646195707266e00,
+                2.8621047467298468e00,
+                -5.8641470402843421e-01,
+                -1.5178915176777426e01,
+                2.3211717123277591e01,
+                -9.3414295847965061e00,
+                -6.1172231064332783e00,
+                2.1957964102200167e00,
+                -3.4265643705632465e-01,
+                -9.6769153352706798e00,
+                1.4613873405033004e01,
+                -5.8450824172251430e00,
+                -5.4212678780860326e00,
+                2.6341589573018260e00,
+                -3.4085224757280796e-01,
+                -1.1835854891340576e01,
+                1.7794701474942944e01,
+                -7.1075278532253687e00,
+                -6.1178367984533244e00,
+                2.1945528943967396e00,
+                -3.4261268423617658e-01,
+                -9.6695829134679272e00,
+                1.4600877298870854e01,
+                -5.8381668136523013e00,
+                -6.1072022151656586e00,
+                2.2922503774685161e00,
+                -2.6715334266026142e-01,
+                -1.0408120531614587e01,
+                1.5617405440391840e01,
+                -6.2270636615178061e00,
+                -5.1722074807324017e00,
+                1.7098190643016411e00,
+                -1.4098618492175408e-01,
+                -5.7061337346696464e00,
+                8.4331806866534098e00,
+                -3.3349192888568142e00,
+                -4.2766424379800121e00,
+                2.0860564217794284e00,
+                -1.5548660419053545e-01,
+                -7.0034949575065015e00,
+                1.0332245608764421e01,
+                -4.0873492185766374e00,
+                -5.1727690165421372e00,
+                1.7132539127425084e00,
+                -1.2776576793785877e-01,
+                -5.7565343018918274e00,
+                8.4941254548170697e00,
+                -3.3479852132230872e00,
+                -5.0998839330979591e00,
+                1.8678855512825561e00,
+                -5.7718910331047868e-02,
+                -6.5095346397755423e00,
+                9.5462002113817768e00,
+                -3.7632628689263172e00,
+                -4.2112469382255613e00,
+                1.3675717927787789e00,
+                -9.4961575783498800e-03,
+                -2.7877417589321136e00,
+                3.9953503912711956e00,
+                -1.5499906707437840e00,
+                -3.1046711877098376e00,
+                1.6568346830533449e00,
+                -4.5990009889900242e-02,
+                -3.3140676307068091e00,
+                4.7472200808709299e00,
+                -1.8492173878772247e00,
+                -4.1976749320353317e00,
+                1.4246952243441517e00,
+                8.7531923058200650e-02,
+                -3.0996975434049761e00,
+                4.4668738099197531e00,
+                -1.7103055321708385e00,
+                -4.0163145894665320e00,
+                1.5923303121893606e00,
+                5.8249749369824022e-02,
+                -3.3748048713195491e00,
+                4.7925769874900315e00,
+                -1.8598420111853879e00,
+                -3.1955533414298376e00,
+                1.2168024121915868e00,
+                9.9474205814620603e-02,
+                -8.6811124876189694e-01,
+                1.1994338853723501e00,
+                -4.4837238870567747e-01,
+                -1.9098914522594992e00,
+                1.3654451552507061e00,
+                2.9537044429980407e-03,
+                -9.3701125207094127e-01,
+                1.2575365835116745e00,
+                -4.7248060681970733e-01,
+                -3.0285770502890443e00,
+                1.6166340190704305e00,
+                4.8662683065338386e-01,
+                -1.2308607057515726e00,
+                1.6114560066217587e00,
+                -6.5896729332189652e-01,
+                -2.8078044229222514e00,
+                1.4555130910035559e00,
+                9.0876948497501955e-02,
+                -1.0566809618626720e00,
+                1.3938154223720176e00,
+                -5.2279617091852160e-01,
+                -1.9963264755188566e00,
+                1.3672906754961440e00,
+                2.0801988470625002e-01,
+                2.0083818728351077e-02,
+                -1.5135587406137185e-02,
+                -1.4175240342178652e-02,
+                -6.9344786794476854e-01,
+                1.2280621078720415e00,
+                1.2333381103148277e-02,
+                -1.0895386066093759e-02,
+                2.1764282171790141e-02,
+                -1.0106900291744604e-02,
+                -1.2036881930169383e00,
+                2.0482931230000392e00,
+                -1.2689218008973949e-01,
+                -5.0580690719339239e-01,
+                3.4047786101030464e-01,
+                -7.0959386937004015e-02,
+                -1.4470760938303664e00,
+                1.4285049373060201e00,
+                5.5764887956399375e-02,
+                -2.9461990750009881e-02,
+                2.3005167601875431e-02,
+                -1.0760396189439407e-02,
+                -4.3024292433642597e-01,
+                1.7121633497582587e00,
+                3.5705413032693957e-02,
+                -9.9216800479772127e-01,
+                1.5115432403429119e00,
+                -6.3985596276149748e-01,
+                5.4770961684437192e-01,
+                1.2565653391084903e00,
+                9.1639130181564755e-03,
+                -6.8547618650262643e-01,
+                1.2037212931265591e00,
+                -5.1526772142324506e-01,
+                4.8142431677326969e-01,
+                1.2842025505965851e00,
+                -3.1103960497811806e-01,
+                -3.8667287940463613e-01,
+                9.2663039525338942e-01,
+                -4.1330437951972537e-01,
+                1.9976512094478704e-02,
+                1.4898674304290889e00,
+                -2.1940405767858565e-03,
+                -8.0791207141984167e-01,
+                1.3979310081478775e00,
+                -5.9845265079421794e-01,
+                1.1971451112382212e00,
+                1.6539633089946477e00,
+                -2.7009878691796618e-01,
+                -2.8868139196850624e00,
+                4.7294193613612734e00,
+                -1.9578020397520424e00,
+                1.8164162541717044e00,
+                1.4570111710269262e00,
+                2.2385898037164991e-02,
+                -3.1195681762439769e00,
+                4.9723722392038878e00,
+                -2.0423972644796100e00,
+                1.5812403987207633e00,
+                1.1421043858413655e00,
+                -4.4319666868952730e-02,
+                -2.3144705949527720e00,
+                3.7448930479898297e00,
+                -1.5426803544433196e00,
+                1.4992161878806018e00,
+                1.6612039136364238e00,
+                -2.2870713891204597e-02,
+                -3.4442115437939465e00,
+                5.5057190995408973e00,
+                -2.2657208348376137e00,
+                2.4658130352390710e00,
+                1.5819912227884063e00,
+                -1.3204477532594588e-01,
+                -5.7752803465671017e00,
+                9.0677018990478242e00,
+                -3.6843468204828174e00,
+                3.1062201217160963e00,
+                1.8205810727868250e00,
+                7.3942159732456811e-02,
+                -7.3418038323250947e00,
+                1.1309154676354810e01,
+                -4.5733470083866452e00,
+                2.5667672162869133e00,
+                1.3762236869878626e00,
+                5.4823291778512563e-02,
+                -5.5558964069977943e00,
+                8.5620133672289516e00,
+                -3.4575259608624478e00,
+                2.9333361085351610e00,
+                1.9771000784477066e00,
+                2.1600903596218385e-02,
+                -7.7786452012965430e00,
+                1.2026327126407146e01,
+                -4.8722408979121159e00,
+                3.5238342146994350e00,
+                1.8411341262124141e00,
+                1.0485737443151430e-01,
+                -1.0316470080846322e01,
+                1.5628354265192609e01,
+                -6.2547428286449396e00,
+                4.3947471898784478e00,
+                2.3129375587624681e00,
+                1.6998863701958250e-01,
+                -1.3069120913924280e01,
+                1.9764673064124775e01,
+                -7.9234176878170990e00,
+                3.5464051944219954e00,
+                1.7786047141550632e00,
+                1.8395466553434961e-01,
+                -1.0256713338978345e01,
+                1.5450540198835597e01,
+                -6.1709943751208902e00,
+                4.3074781177775723e00,
+                2.4284702978185178e00,
+                1.2121907902830774e-01,
+                -1.3510697720561426e01,
+                2.0490823414440431e01,
+                -8.2265504110307699e00,
+                4.5269670710447079e00,
+                2.3411415500822019e00,
+                3.7814443659878427e-01,
+                -1.6533454371385766e01,
+                2.4532574055181296e01,
+                -9.7222898630871342e00,
+                5.6498078480438974e00,
+                2.8871559084424092e00,
+                3.1648740182441881e-01,
+                -1.9832336139347099e01,
+                2.9630584562783888e01,
+                -1.1804975183138390e01,
+                4.5317970588477650e00,
+                2.3235629480266455e00,
+                4.0711209040396701e-01,
+                -1.6523611973754900e01,
+                2.4482080409856291e01,
+                -9.6968326211377835e00,
+                5.6107427774726322e00,
+                2.9693568967987254e00,
+                2.6856229367890733e-01,
+                -2.0186235796983127e01,
+                3.0228033555488111e01,
+                -1.2057362656117963e01,
+                5.5230828784340904e00,
+                3.0159142144119913e00,
+                7.5032702265793638e-01,
+                -2.4452361306480910e01,
+                3.5745746299744695e01,
+                -1.4059387633540990e01,
+                6.8467243986091164e00,
+                3.5205846294935204e00,
+                5.5323452910250115e-01,
+                -2.7424447720726722e01,
+                4.0542113968978946e01,
+                -1.6058340606199877e01,
+                5.5241079122419858e00,
+                3.0111097413061287e00,
+                7.6043241689918206e-01,
+                -2.4453330947201032e01,
+                3.5733842835424838e01,
+                -1.4052622761934279e01,
+                6.8330970703372866e00,
+                3.5730950345697865e00,
+                5.0442967447855436e-01,
+                -2.7630302835415993e01,
+                4.0921397061842079e01,
+                -1.6223699529825666e01,
+                6.5233214752268127e00,
+                3.8455313715589599e00,
+                1.2738445662734672e00,
+                -3.4142511056048967e01,
+                4.9288751118195229e01,
+                -1.9258816488331760e01,
+                7.9798691992574877e00,
+                4.2304633704347614e00,
+                9.4916911879724064e-01,
+                -3.6082800915305256e01,
+                5.2740474636382487e01,
+                -2.0757970588732530e01,
+                6.5235391967368317e00,
+                3.8442392655293900e00,
+                1.2772689685023881e00,
+                -3.4144245582802192e01,
+                4.9286600694030149e01,
+                -1.9257235266278844e01,
+                7.9780164759860508e00,
+                4.2581364755189171e00,
+                9.0490824102641643e-01,
+                -3.6146890048111374e01,
+                5.2902251888236343e01,
+                -2.0834714063750525e01,
+                7.5301209868737518e00,
+                4.8266093670811516e00,
+                1.9906532239804082e00,
+                -4.5696171225139402e01,
+                6.5222794336738914e01,
+                -2.5330008845677121e01,
+                9.0592048208341964e00,
+                5.0524444639807982e00,
+                1.5639083038511417e00,
+                -4.6227354827270197e01,
+                6.6742768625790532e01,
+                -2.6090733281390481e01,
+                7.5301672757177256e00,
+                4.8262668988539703e00,
+                1.9917837214882572e00,
+                -4.5697152262800707e01,
+                6.5222641787790508e01,
+                -2.5329699752317662e01,
+                9.0617089689058279e00,
+                5.0627200474303731e00,
+                1.5306087886050987e00,
+                -4.6201245261995687e01,
+                6.6753711704174307e01,
+                -2.6103836713323240e01,
+                8.5439978438576958e00,
+                5.9605352581937785e00,
+                2.9388171122244109e00,
+                -5.9213652478598007e01,
+                8.3623964589400401e01,
+                -3.2288651007290504e01,
+                1.0100238105795977e01,
+                6.0156046860821641e00,
+                2.4311227628788585e00,
+                -5.8189717323516248e01,
+                8.2972590004142106e01,
+                -3.2212869674305303e01,
+                8.5440076687321067e00,
+                5.9604459430021439e00,
+                2.9391801366526531e00,
+                -5.9214078468041464e01,
+                8.3624068891376510e01,
+                -3.2288610777657510e01,
+                1.0103667533796683e01,
+                6.0158650887345448e00,
+                2.4107760944314816e00,
+                -5.8125625048064265e01,
+                8.2906979417176174e01,
+                -3.2191629006406409e01,
+                9.5650113177877785e00,
+                7.2498153679976820e00,
+                4.1551371399277919e00,
+                -7.4795843598083408e01,
+                1.0457037732454131e02,
+                -4.0151433068943419e01,
+                1.1116968561077568e01,
+                7.1347098863330896e00,
+                3.5688140741297674e00,
+                -7.2151486218593305e01,
+                1.0165680693075836e02,
+                -3.9206269356622016e01,
+                9.5650133940644455e00,
+                7.2497924894015711e00,
+                4.1552503042122613e00,
+                -7.4796005009548836e01,
+                1.0457044971811401e02,
+                -4.0151435976986221e01,
+                1.1120034079668221e01,
+                7.1303147700774092e00,
+                3.5594873892317103e00,
+                -7.2082067018068685e01,
+                1.0156598726189708e02,
+                -3.9171834664292227e01,
+                1.0593064483227742e01,
+                8.6969028070512202e00,
+                5.6755396034912966e00,
+                -9.2539537763180832e01,
+                1.2813560149579646e02,
+                -4.8933613418447223e01,
+                1.2119543877083460e01,
+                8.4137603187360543e00,
+                4.9925034366798311e00,
+                -8.8194505075704640e01,
+                1.2287993196505218e02,
+                -4.7096724506223822e01,
+                1.0593064919257221e01,
+                8.6968970567044934e00,
+                5.6755738143875760e00,
+                -9.2539593640863643e01,
+                1.2813563331215474e02,
+                -4.8933618162805772e01,
+                1.2121921818513506e01,
+                8.4078642204619420e00,
+                4.9908632634858190e00,
+                -8.8134432374832016e01,
+                1.2279086550380391e02,
+                -4.7060844505587738e01,
+                1.1627957207938659e01,
+                1.0303707615441018e01,
+                7.5344011042552923e00,
+                -1.1253294830348190e02,
+                1.5438372244089408e02,
+                -5.8647453529357783e01,
+                1.3114510015623049e01,
+                9.8513572940713416e00,
+                6.7213349376406626e00,
+                -1.0635738219113546e02,
+                1.4665751311861146e02,
+                -5.5881528760137869e01,
+                1.1627957298834614e01,
+                1.0303706197478814e01,
+                7.5344111366673712e00,
+                -1.1253296638384563e02,
+                1.5438373415898508e02,
+                -5.8647455853629580e01,
+                1.3116237925845430e01,
+                9.8455331102145145e00,
+                6.7243141059359051e00,
+                -1.0631074264006560e02,
+                1.4658112805680690e02,
+                -5.5849452095162235e01,
+                1.2669386535689361e01,
+                1.2071287030293307e01,
+                9.7633555455962835e00,
+                -1.3485075345900265e02,
+                1.8336444946299886e02,
+                -6.9300787627414508e01,
+                1.4105804414673191e01,
+                1.1444289269702800e01,
+                8.7789794745243590e00,
+                -1.2666835962860844e02,
+                1.7298274034188972e02,
+                -6.5547771558832267e01,
+                1.2669386554490638e01,
+                1.2071286687068984e01,
+                9.7633584027450482e00,
+                -1.3485075900242089e02,
+                1.8336445335820781e02,
+                -6.9300788508071975e01,
+                1.4107018463574896e01,
+                1.1439185153305873e01,
+                8.7843335749580440e00,
+                -1.2663444344319166e02,
+                1.7292158897636148e02,
+                -6.5521162694327174e01,
+                1.3716937488160630e01,
+                1.3999597459400730e01,
+                1.2389915672436279e01,
+                -1.5954894249539399e02,
+                2.1510813446746886e02,
+                -8.0895567204040049e01,
+                1.5095682313349364e01,
+                1.3189272906323732e01,
+                1.1192627051714643e01,
+                -1.4915916817312757e02,
+                2.0184825850919157e02,
+                -7.6081293415969839e01,
+                1.3716937492019641e01,
+                1.3999597377767842e01,
+                1.2389916464009524e01,
+                -1.5954894412085929e02,
+                2.1510813567394996e02,
+                -8.0895567498068928e01,
+                1.5096520030681436e01,
+                1.3185064407456906e01,
+                1.1198910160279951e01,
+                -1.4913565617175487e02,
+                2.0180124290250004e02,
+                -7.6060129778156622e01,
+                1.4770075388032444e01,
+                1.6087303167766446e01,
+                1.5436222950666867e01,
+                -1.8666021493779203e02,
+                2.4962122089688103e02,
+                -9.3426463524457304e01,
+                1.6085379191481852e01,
+                1.5083589447287226e01,
+                1.3991739427782750e01,
+                -1.7386892459375579e02,
+                2.3325385095807121e02,
+                -8.7470099643500802e01,
+                1.4770075388818769e01,
+                1.6087303148664304e01,
+                1.5436223164442264e01,
+                -1.8666021539675981e02,
+                2.4962122125116741e02,
+                -9.3426463615076329e01,
+                1.6085951551006787e01,
+                1.5080238931969067e01,
+                1.3998101278449143e01,
+                -1.7385331837944693e02,
+                2.3321864790104019e02,
+                -8.7453697552144448e01,
+                1.5828143941097450e01,
+                1.8331670220961666e01,
+                1.8918268274003861e01,
+                -2.1619095210442941e02,
+                2.8688297635978756e02,
+                -1.0687973526499771e02,
+                1.7075534787366465e01,
+                1.7125200136366264e01,
+                1.7207074959934751e01,
+                -2.0084388544719391e02,
+                2.6720765911058965e02,
+                -9.9705133726570395e01,
+                1.5828143941256627e01,
+                1.8331670216557445e01,
+                1.8918268330404022e01,
+                -2.1619095222989833e02,
+                2.8688297645950814e02,
+                -1.0687973529137253e02,
+                1.7075923730873765e01,
+                1.7122590193964911e01,
+                1.7213058024904747e01,
+                -2.0083402645820061e02,
+                2.6718180837697332e02,
+                -9.9692640534772679e01,
+                1.6890371426423382e01,
+                2.0728579569842751e01,
+                2.2845917469463828e01,
+                -2.4812083435502871e02,
+                3.2684448823688496e02,
+                -1.2123263616047282e02,
+                1.8066449820492846e01,
+                1.9312661524160735e01,
+                2.0870036016187061e01,
+                -2.3013589616073858e02,
+                3.0372498377642154e02,
+                -1.1277999824352135e02,
+                1.6890371426455424e01,
+                2.0728579568840633e01,
+                2.2845917484032956e01,
+                -2.4812083438838550e02,
+                3.2684448826399682e02,
+                -1.2123263616782057e02,
+                1.8066713333743454e01,
+                1.9310657703202459e01,
+                2.0875423564416035e01,
+                -2.3013008228413184e02,
+                3.0370630494679148e02,
+                -1.1277060230387309e02,
+                1.7955886187113396e01,
+                2.3272683588860026e01,
+                2.7223982220959247e01,
+                -2.8240595076334000e02,
+                3.6943078590316281e02,
+                -1.3645364576977221e02,
+                1.9058236733002300e01,
+                2.1644988962398710e01,
+                2.5012267757287322e01,
+                -2.6180071928343307e02,
+                3.4282650121799617e02,
+                -1.2669036882336400e02,
+                1.7955886187119816e01,
+                2.3272683588634656e01,
+                2.7223982224651898e01,
+                -2.8240595077199526e02,
+                3.6943078591032139e02,
+                -1.3645364577174797e02,
+                1.9058414960148450e01,
+                2.1643466247439289e01,
+                2.5016983354038196e01,
+                -2.6179767020610126e02,
+                3.4281320617581565e02,
+                -1.2668337355331974e02,
+                1.9023741366983238e01,
+                2.5957710504548576e01,
+                3.2054387652193789e01,
+                -3.1898571318422574e02,
+                4.1454655650462962e02,
+                -1.5250373535684176e02,
+                2.0050906563887416e01,
+                2.4121527381838824e01,
+                2.9665428981325245e01,
+                -2.9589665055055406e02,
+                3.8453661583827250e02,
+                -1.4143340987287985e02,
+                1.9023741366984520e01,
+                2.5957710504498362e01,
+                3.2054387653114766e01,
+                -3.1898571318642672e02,
+                4.1454655650647550e02,
+                -1.5250373535735841e02,
+                2.0051026978020587e01,
+                2.4120379273875816e01,
+                2.9669474257430963e01,
+                -2.9589543070583102e02,
+                3.8452729731205977e02,
+                -1.4142824748467820e02,
+                2.0092947487287756e01,
+                2.8776895490568755e01,
+                3.7339233558876920e01,
+                -9.8781982607414882e00,
+                7.0916635282296292e-01,
+                -1.2340880155534291e-02,
+                2.1044418341890132e01,
+                2.6741847681518077e01,
+                3.4861073630499796e01,
+                -9.1700568642165461e00,
+                6.5220324713443967e-01,
+                -1.1045071585279443e-02,
+                2.0092947487288011e01,
+                2.8776895490557653e01,
+                3.7339233559103448e01,
+                -9.8781982608033179e00,
+                7.0916635282857932e-01,
+                -1.2340880155703077e-02,
+                2.1044499630877905e01,
+                2.6740987496092696e01,
+                3.4864491165514394e01,
+                -9.1707199731434574e00,
+                6.5223741134844682e-01,
+                -1.1045188698410773e-02,
+                2.1162510215379026e01,
+                3.1723491960797684e01,
+                4.3084295875067085e01,
+                -4.1033675985379521e00,
+                -6.6095139594000130e-01,
+                6.0977735530407223e-02,
+                2.2038706806958309e01,
+                2.9505670300337073e01,
+                4.0630600131872811e01,
+                -2.7905442844326718e00,
+                -8.3885972791335117e-01,
+                6.8309956404426039e-02,
+                2.1162510215379076e01,
+                3.1723491960795304e01,
+                4.3084295875120795e01,
+                -4.1033675985539224e00,
+                -6.6095139593840913e-01,
+                6.0977735530354210e-02,
+                2.2038761643178379e01,
+                2.9505029336592230e01,
+                4.0633451796171073e01,
+                -2.7913314472201640e00,
+                -8.3878528163749511e-01,
+                6.8307595298566767e-02,
+                3.1719012432820758e01,
+                6.7480322661109355e01,
+                1.3318978565899991e02,
+                -1.6791944323404795e01,
+                -1.0181217992701848e00,
+                1.2989592638281225e-01,
+                3.2009499874031789e01,
+                6.5013296175889408e01,
+                1.3669799889514238e02,
+                -1.7009031615065428e01,
+                -1.0689880784706638e00,
+                1.3388972346122466e-01,
+                3.1719012432820758e01,
+                6.7480322661109355e01,
+                1.3318978565899991e02,
+                -1.6791944323404795e01,
+                -1.0181217992701848e00,
+                1.2989592638281225e-01,
+                3.2009500887769519e01,
+                6.5013269472322307e01,
+                1.3669829238273672e02,
+                -1.7009116366540379e01,
+                -1.0689798256828462e00,
+                1.3388945486998777e-01,
+                4.1931127118492086e01,
+                1.1600186087954401e02,
+                3.1751764022286790e02,
+                -4.6438894455748802e01,
+                -8.7599401950869438e-01,
+                2.2297105562740663e-01,
+                4.2002297497564768e01,
+                1.1479764873768737e02,
+                3.2393143797302810e02,
+                -4.7847299173836262e01,
+                -7.8150712905299369e-01,
+                2.2131248436241077e-01,
+                4.1931127118492086e01,
+                1.1600186087954401e02,
+                3.1751764022286790e02,
+                -4.6438894455748802e01,
+                -8.7599401950869438e-01,
+                2.2297105562740663e-01,
+                4.2002297514594851e01,
+                1.1479764793294436e02,
+                3.2393145467669495e02,
+                -4.7847304068128608e01,
+                -7.8150664807362491e-01,
+                2.2131246858403722e-01,
+                5.1984670105634827e01,
+                1.7926303194781252e02,
+                6.2846495111925287e02,
+                -1.0034649475039414e02,
+                2.4606292097951082e-01,
+                3.3256752105517051e-01,
+                5.2000554052128159e01,
+                1.7883235795593501e02,
+                6.3273302895025176e02,
+                -1.0138733878813618e02,
+                3.2804187851642969e-01,
+                3.3055293107858102e-01,
+                5.1984670105634827e01,
+                1.7926303194781252e02,
+                6.2846495111925287e02,
+                -1.0034649475039414e02,
+                2.4606292097951082e-01,
+                3.3256752105517051e-01,
+                5.2000554052402805e01,
+                1.7883235793562420e02,
+                6.3273302962903426e02,
+                -1.0138733898825184e02,
+                3.2804189825766372e-01,
+                3.3055293042886030e-01,
+                6.1996666427075382e01,
+                2.5724136589119979e02,
+                1.0913830717468406e03,
+                -1.8317243758181812e02,
+                2.5193786568880601e00,
+                4.6277932792022042e-01,
+                6.2000133522892554e01,
+                2.5710536851489377e02,
+                1.0934673032018356e03,
+                -1.8370056934287794e02,
+                2.5630609198690104e00,
+                4.6162176037505448e-01,
+                6.1996666427075382e01,
+                2.5724136589119979e02,
+                1.0913830717468406e03,
+                -1.8317243758181812e02,
+                2.5193786568880601e00,
+                4.6277932792022042e-01,
+                6.2000133522896938e01,
+                2.5710536851442714e02,
+                1.0934673032246803e03,
+                -1.8370056934963364e02,
+                2.5630609205366826e00,
+                4.6162176035304603e-01,
+                7.1999279107664492e01,
+                3.4965254984584158e02,
+                1.7356304176273381e03,
+                -3.0063395678020430e02,
+                6.2079056750108883e00,
+                6.1505333334154833e-01,
+                7.2000032172982571e01,
+                3.4961232791697932e02,
+                1.7365043785874466e03,
+                -3.0086002522613632e02,
+                6.2270725229979789e00,
+                6.1452738833821030e-01,
+                7.1999279107664492e01,
+                3.4965254984584158e02,
+                1.7356304176273381e03,
+                -3.0063395678020430e02,
+                6.2079056750108883e00,
+                6.1505333334154833e-01,
+                7.2000032172982642e01,
+                3.4961232791696904e02,
+                1.7365043785881401e03,
+                -3.0086002522634379e02,
+                6.2270725230187063e00,
+                6.1452738833751985e-01,
+                8.1999844359310714e01,
+                4.5636323545227941e02,
+                2.5918884526432239e03,
+                -4.5885344883307727e02,
+                1.1616256691917803e01,
+                7.8948404417119522e-01,
+                8.2000007751936337e01,
+                4.5635184072744744e02,
+                2.5922210189842476e03,
+                -4.5894061525528980e02,
+                1.1623761628208563e01,
+                7.8927378661620728e-01,
+                8.1999844359310714e01,
+                4.5636323545227941e02,
+                2.5918884526432239e03,
+                -4.5885344883307727e02,
+                1.1616256691917803e01,
+                7.8948404417119522e-01,
+                8.2000007751936337e01,
+                4.5635184072744744e02,
+                2.5922210189842476e03,
+                -4.5894061525528980e02,
+                1.1623761628208563e01,
+                7.8927378661620728e-01,
+            ],
+            dtype=dtype,
+            device=env.DEVICE,
+        ).reshape(8, 174)  # 1392
+        self.table_info_tensor = torch.tensor(
+            [
+                -2.1000000000000000e01,
+                2.1000000000000000e01,
+                1.0500000000000000e02,
+                1.0000000000000000e00,
+                1.0000000000000000e01,
+                -1.0000000000000000e00,
+            ],
+            dtype=dtype,
+            device="cpu",
+        )
+        self.em_x_tensor = torch.tensor(
+            [
+                9.3816147034272368e-01,
+                -1.6703373029862567e-01,
+                -4.4294526064601734e-02,
+                -2.8798505489184573e-01,
+                -1.6703373029862567e-01,
+                9.2489218226366088e-01,
+                -2.8928196536572048e-01,
+                -4.7833509099876154e-01,
+                -4.4294526064601734e-02,
+                -2.8928196536572048e-01,
+                5.7034320185695120e-01,
+                1.8771147911830000e-01,
+                -2.8798505489184573e-01,
+                -4.7833509099876154e-01,
+                1.8771147911830000e-01,
+                4.0174654365823070e-01,
+                8.4370316144902313e-01,
+                -3.7813146789689916e-02,
+                -3.6989397568296523e-01,
+                -4.0554075086539937e-01,
+                -3.7813146789689916e-02,
+                6.5766402633747112e-01,
+                -4.2312966361682885e-01,
+                1.2685067374257861e-01,
+                -3.6989397568296523e-01,
+                -4.2312966361682885e-01,
+                6.0171576901660107e-01,
+                9.8283160997298613e-02,
+                -4.0554075086539937e-01,
+                1.2685067374257861e-01,
+                9.8283160997298613e-02,
+                2.1324148100625978e-01,
+                9.7843596341516559e-01,
+                -1.0492833888237871e-01,
+                -1.0538688914576379e-01,
+                -2.0453551592353389e-01,
+                -1.0492833888237871e-01,
+                7.7943976693565231e-01,
+                -1.5898500035781410e-01,
+                9.4834209331437741e-02,
+                -1.0538688914576379e-01,
+                -1.5898500035781410e-01,
+                7.4778071691708869e-01,
+                -6.1895255142095873e-01,
+                -2.0453551592353389e-01,
+                9.4834209331437741e-02,
+                -6.1895255142095873e-01,
+                6.0844713798743799e-01,
+                1.0079020879244640e00,
+                -2.3855984150631487e-01,
+                -3.4608276043004524e-02,
+                -4.7448768267289088e-01,
+                -2.3855984150631487e-01,
+                4.9732018171028253e-01,
+                -3.1320787082485729e-01,
+                -1.4528004145602180e-01,
+                -3.4608276043004524e-02,
+                -3.1320787082485729e-01,
+                4.7696729363954582e-01,
+                1.1723268074231248e-01,
+                -4.7448768267289088e-01,
+                -1.4528004145602180e-01,
+                1.1723268074231248e-01,
+                4.0511515406019899e-01,
+            ],
+            dtype=dtype,
+            device=env.DEVICE,
+        ).reshape(4, 16)  # 3072
+        self.em_tensor = self.em_x_tensor.reshape(4, 4, 4)
+        self.table_info_tensor.requires_grad = False
+        self.table_tensor.requires_grad = False
+        self.em_x_tensor.requires_grad = True
+        self.em_tensor.requires_grad = True
+
+        # These hint the shapes, but not used
+        self.last_layer_size = 4
+        self.nnei_i = 4
+        self.nnei_j = 4
+
+        self.expected_descriptor_tensor = torch.tensor(
+            [
+                -2.079966061311932624e00,
+                -7.692086176232872941e-01,
+                -1.330535805782471437e00,
+                -1.535049115492904015e00,
+                -3.387998402723848113e00,
+                -2.128016321720361592e00,
+                -3.277106691660685023e00,
+                -3.038592571524457941e00,
+                -3.249069620732517372e00,
+                -1.970139560184089200e00,
+                -3.099042807698376478e00,
+                -2.871870824895611740e00,
+                -3.505396485462592526e00,
+                -2.262865452602483174e00,
+                -3.422716794166150045e00,
+                -3.179802052814081126e00,
+                -3.387998402723848113e00,
+                -2.128016321720361592e00,
+                -3.277106691660685023e00,
+                -3.038592571524457941e00,
+                -2.097644546971316970e00,
+                -7.853817517282856153e-01,
+                -1.357716376463428354e00,
+                -1.553790419377535814e00,
+                -3.506551159314416299e00,
+                -2.264198868197136782e00,
+                -3.424124719375644776e00,
+                -3.181191625971385051e00,
+                -3.660169689351369549e00,
+                -2.443224371314374199e00,
+                -3.605953674751581683e00,
+                -3.365824124673929774e00,
+                -3.249069620732517372e00,
+                -1.970139560184089200e00,
+                -3.099042807698376478e00,
+                -2.871870824895611740e00,
+                -3.506551159314416299e00,
+                -2.264198868197136782e00,
+                -3.424124719375644776e00,
+                -3.181191625971385051e00,
+                -2.530401019792652750e00,
+                -1.199448737241302965e00,
+                -2.045857780281324256e00,
+                -2.028209748272746360e00,
+                -2.967997665475758939e00,
+                -1.658223979429474859e00,
+                -2.714263642699213275e00,
+                -2.536766176574209553e00,
+                -3.505396485462592526e00,
+                -2.262865452602483174e00,
+                -3.422716794166150045e00,
+                -3.179802052814081126e00,
+                -3.660169689351369549e00,
+                -2.443224371314374199e00,
+                -3.605953674751581683e00,
+                -3.365824124673929774e00,
+                -2.967997665475758939e00,
+                -1.658223979429474859e00,
+                -2.714263642699213275e00,
+                -2.536766176574209553e00,
+                -2.720389222561622855e00,
+                -1.393795402757894042e00,
+                -2.345287611062552902e00,
+                -2.246069186600956336e00,
+                -2.203412666452015500e00,
+                -8.832922757908097111e-01,
+                -1.522624038797550927e00,
+                -1.666955513084583895e00,
+                -3.241305103472522209e00,
+                -1.961375736752476673e00,
+                -3.088893233422514317e00,
+                -2.862569539842237276e00,
+                -3.574866904199356821e00,
+                -2.343386670122550530e00,
+                -3.506415378108253655e00,
+                -3.263388258370226502e00,
+                -3.603390794940432240e00,
+                -2.376642828344087466e00,
+                -3.540123958621633538e00,
+                -3.297678764698442677e00,
+                -3.241305103472522209e00,
+                -1.961375736752476673e00,
+                -3.088893233422514317e00,
+                -2.862569539842237276e00,
+                -2.429999277303452665e00,
+                -1.100023053295984932e00,
+                -1.884632599239140616e00,
+                -1.915409451705063049e00,
+                -3.617225590985869044e00,
+                -2.392818241566808002e00,
+                -3.556322178869617989e00,
+                -3.314299042117806238e00,
+                -3.041076611338295876e00,
+                -1.738238794499435524e00,
+                -2.817792369079202164e00,
+                -2.623522438271889357e00,
+                -3.574866904199356821e00,
+                -2.343386670122550530e00,
+                -3.506415378108253655e00,
+                -3.263388258370226502e00,
+                -3.617225590985869044e00,
+                -2.392818241566808002e00,
+                -3.556322178869617989e00,
+                -3.314299042117806238e00,
+                -2.494628058196212361e00,
+                -1.163767319481099349e00,
+                -1.988521437257453117e00,
+                -1.987826554800365741e00,
+                -3.075717632972006399e00,
+                -1.776439224176796783e00,
+                -2.866012767430887731e00,
+                -2.664752088723873147e00,
+                -3.603390794940432240e00,
+                -2.376642828344087466e00,
+                -3.540123958621633538e00,
+                -3.297678764698442677e00,
+                -3.041076611338295876e00,
+                -1.738238794499435524e00,
+                -2.817792369079202164e00,
+                -2.623522438271889357e00,
+                -3.075717632972006399e00,
+                -1.776439224176796783e00,
+                -2.866012767430887731e00,
+                -2.664752088723873147e00,
+                -2.937692581645545253e00,
+                -1.625281447224701248e00,
+                -2.670609348993408627e00,
+                -2.500890937682363369e00,
+                -2.025708717938600856e00,
+                -7.199180117105589138e-01,
+                -1.247903242801984502e00,
+                -1.477847643583491077e00,
+                -3.319906298979989234e00,
+                -2.050399877432793616e00,
+                -3.190625595262678527e00,
+                -2.956812988881384996e00,
+                -3.320427161433143759e00,
+                -2.050991936909778435e00,
+                -3.191292681270106346e00,
+                -2.957438103019588649e00,
+                -3.426553578385282073e00,
+                -2.172154331535164129e00,
+                -3.325432621990271009e00,
+                -3.084944527573738160e00,
+                -3.319906298979989234e00,
+                -2.050399877432793616e00,
+                -3.190625595262678527e00,
+                -2.956812988881384996e00,
+                -2.284064566837665566e00,
+                -9.593000707401009297e-01,
+                -1.650522869993226838e00,
+                -1.754438009603198623e00,
+                -3.379465923571113084e00,
+                -2.118266849034519783e00,
+                -3.266349102610927257e00,
+                -3.028338829309447533e00,
+                -3.079911054493713163e00,
+                -1.781075057846572385e00,
+                -2.871812857785094764e00,
+                -2.669747195759931468e00,
+                -3.320427161433143759e00,
+                -2.050991936909778435e00,
+                -3.191292681270106346e00,
+                -2.957438103019588649e00,
+                -3.379465923571113084e00,
+                -2.118266849034519783e00,
+                -3.266349102610927257e00,
+                -3.028338829309447533e00,
+                -2.322820890996963428e00,
+                -9.962581166315247483e-01,
+                -1.712462166775041483e00,
+                -1.796847406967182925e00,
+                -3.773974160346314832e00,
+                -2.578147069133936320e00,
+                -3.733153897787112818e00,
+                -3.501723916740695497e00,
+                -3.426553578385282073e00,
+                -2.172154331535164129e00,
+                -3.325432621990271009e00,
+                -3.084944527573738160e00,
+                -3.079911054493713163e00,
+                -1.781075057846572385e00,
+                -2.871812857785094764e00,
+                -2.669747195759931468e00,
+                -3.773974160346314832e00,
+                -2.578147069133936320e00,
+                -3.733153897787112818e00,
+                -3.501723916740695497e00,
+                -2.486913710376590814e00,
+                -1.156110267723822860e00,
+                -1.976136357370776642e00,
+                -1.979145885578800446e00,
+                -1.985509025147957018e00,
+                -6.837428483499845555e-01,
+                -1.187510572446436763e00,
+                -1.435784454536765731e00,
+                -3.459824255787086411e00,
+                -2.210355755872039030e00,
+                -3.366749809979232122e00,
+                -3.124964584278840274e00,
+                -3.237455343632644311e00,
+                -1.957033195714938278e00,
+                -3.083852092710132720e00,
+                -2.857958563155091891e00,
+                -3.657182553291094873e00,
+                -2.439708494346079171e00,
+                -3.602533119650989324e00,
+                -3.362243802860200859e00,
+                -3.459824255787086411e00,
+                -2.210355755872039030e00,
+                -3.366749809979232122e00,
+                -3.124964584278840274e00,
+                -2.612858501561634128e00,
+                -1.282799717023056063e00,
+                -2.177105827922439119e00,
+                -2.122088127377702360e00,
+                -3.527506999611477845e00,
+                -2.288426034556684474e00,
+                -3.449583412378998126e00,
+                -3.206410310763728511e00,
+                -3.364731487733799042e00,
+                -2.101446556578417724e00,
+                -3.247719091239923639e00,
+                -3.010635929634002395e00,
+                -3.237455343632644311e00,
+                -1.957033195714938278e00,
+                -3.083852092710132720e00,
+                -2.857958563155091891e00,
+                -3.527506999611477845e00,
+                -2.288426034556684474e00,
+                -3.449583412378998126e00,
+                -3.206410310763728511e00,
+                -2.635737441843280848e00,
+                -1.306200303853801703e00,
+                -2.213214111467705525e00,
+                -2.148325634854432398e00,
+                -3.052719273633249131e00,
+                -1.751058696872038523e00,
+                -2.834060115075742292e00,
+                -2.637372572375544344e00,
+                -3.657182553291094873e00,
+                -2.439708494346079171e00,
+                -3.602533119650989324e00,
+                -3.362243802860200859e00,
+                -3.364731487733799042e00,
+                -2.101446556578417724e00,
+                -3.247719091239923639e00,
+                -3.010635929634002395e00,
+                -3.052719273633249131e00,
+                -1.751058696872038523e00,
+                -2.834060115075742292e00,
+                -2.637372572375544344e00,
+                -2.716586630714696859e00,
+                -1.389827110741064020e00,
+                -2.339411933160714607e00,
+                -2.241657223573843893e00,
+            ],
+            dtype=dtype,
+            device=env.DEVICE,
+        ).reshape(4, 4, 4, 4)
+        self.expected_dy_dem_x = torch.tensor(
+            [
+                6.020806901503564568e00,
+                4.855212102289546472e00,
+                5.539480671183889093e00,
+                4.079184573583559903e00,
+                4.855212102289546472e00,
+                6.002622647924519939e00,
+                4.071866106011439612e00,
+                3.510130130958049044e00,
+                5.539480671183889093e00,
+                4.071866106011439612e00,
+                5.377211408293748107e00,
+                5.608493562913938746e00,
+                4.079184573583559903e00,
+                3.510130130958049044e00,
+                5.608493562913938746e00,
+                5.361908159053506928e00,
+                5.856733593151207451e00,
+                5.562527732431956196e00,
+                3.696970497751218332e00,
+                3.591262868531118535e00,
+                5.562527732431956196e00,
+                5.486518130714558517e00,
+                3.554623757729209022e00,
+                5.671203227676853587e00,
+                3.696970497751218332e00,
+                3.554623757729209022e00,
+                5.409085428112309302e00,
+                5.689207994509548172e00,
+                3.591262868531118535e00,
+                5.671203227676853587e00,
+                5.689207994509548172e00,
+                5.576190638038706737e00,
+                6.061353798127412595e00,
+                5.246377090750031336e00,
+                5.243735237316045961e00,
+                4.603911062145699340e00,
+                5.246377090750031336e00,
+                5.721012903047327569e00,
+                4.908561477952471108e00,
+                5.690718304708298980e00,
+                5.243735237316045961e00,
+                4.908561477952471108e00,
+                5.654457931163397788e00,
+                3.881456101028720962e00,
+                4.603911062145699340e00,
+                5.690718304708298980e00,
+                3.881456101028720962e00,
+                5.417053162665033561e00,
+                6.074411415516496326e00,
+                4.379884232572607949e00,
+                5.573202499423647538e00,
+                3.509692162941947302e00,
+                4.379884232572607949e00,
+                5.338347197725587634e00,
+                3.943361243591621879e00,
+                4.998151625670068654e00,
+                5.573202499423647538e00,
+                3.943361243591621879e00,
+                5.336491405093880758e00,
+                5.678289531805588375e00,
+                3.509692162941947302e00,
+                4.998151625670068654e00,
+                5.678289531805588375e00,
+                5.359752988507883842e00,
+            ],
+            dtype=dtype,
+            device=env.DEVICE,
+        ).reshape(4, 16)
+
+    def test_forward(self) -> None:
+        # Call the forward function
+        forward_result = torch.ops.deepmd.tabulate_fusion_se_t_tebd(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_x_tensor,
+            self.em_tensor,
+            self.last_layer_size,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the shape
+        self.assertEqual(descriptor_tensor.shape, self.expected_descriptor_tensor.shape)
+
+        # Check the values
+        torch.testing.assert_close(
+            descriptor_tensor,
+            self.expected_descriptor_tensor,
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+    def test_backward(self) -> None:
+        # Call the forward function
+        forward_result = torch.ops.deepmd.tabulate_fusion_se_t_tebd(
+            self.table_tensor,
+            self.table_info_tensor,
+            self.em_x_tensor,
+            self.em_tensor,
+            self.last_layer_size,
+        )
+
+        descriptor_tensor = forward_result[0]
+
+        # Check the forward
+        torch.testing.assert_close(
+            descriptor_tensor,
+            self.expected_descriptor_tensor,
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+        # Create a loss and perform backward
+        loss = descriptor_tensor.sum()
+        loss.backward()
+
+        # Check gradients
+        self.assertIsNotNone(self.em_x_tensor.grad)
+
+        # No need to check em_tensor.grad, it is ignored in the op
+        # Since em_tensor and em_x_tensor share the same data
+
+        # Check the shapes of the gradients
+        self.assertEqual(self.em_x_tensor.grad.shape, self.expected_dy_dem_x.shape)
+
+        # Check the values of the gradients
+        torch.testing.assert_close(
+            self.em_x_tensor.grad,
+            self.expected_dy_dem_x,
+            atol=self.prec,
+            rtol=self.prec,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
index da239212b0..ff4f00f912 100644
--- a/source/tests/pt/test_training.py
+++ b/source/tests/pt/test_training.py
@@ -92,7 +92,11 @@ def test_dp_train(self) -> None:
                     state_dict_trained[state_key],
                     state_dict_finetuned_empty[state_key],
                 )
-                if "fitting_net" not in state_key:
+                if (
+                    ("fitting_net" not in state_key)
+                    or ("fparam" in state_key)
+                    or ("aparam" in state_key)
+                ):
                     torch.testing.assert_close(
                         state_dict_trained[state_key],
                         state_dict_finetuned_random[state_key],
@@ -256,6 +260,7 @@ def setUp(self) -> None:
         self.config["training"]["save_freq"] = 1
         self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000"
         shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy")
+        self.config["model"]["data_stat_nbatch"] = 100
 
     def tearDown(self) -> None:
         (self.set_path / "fparam.npy").unlink(missing_ok=True)
diff --git a/source/tests/tf/test_nvnmd_entrypoints.py b/source/tests/tf/test_nvnmd_entrypoints.py
index eaf8bfafd5..6833b4e0f8 100644
--- a/source/tests/tf/test_nvnmd_entrypoints.py
+++ b/source/tests/tf/test_nvnmd_entrypoints.py
@@ -515,6 +515,7 @@ def test_model_qnn_v0(self) -> None:
     def tearDown(self) -> None:
         # close
         nvnmd_cfg.enable = False
+        tf.reset_default_graph()
 
 
 class TestNvnmdEntrypointsV1(tf.test.TestCase):
@@ -878,6 +879,7 @@ def test_wrap_qnn_v1(self) -> None:
     def tearDown(self) -> None:
         # close
         nvnmd_cfg.enable = False
+        tf.reset_default_graph()
 
 
 if __name__ == "__main__":
diff --git a/source/tests/tf/test_out_bias_std.py b/source/tests/tf/test_out_bias_std.py
index 5d0ca45274..a5bd4a05cc 100644
--- a/source/tests/tf/test_out_bias_std.py
+++ b/source/tests/tf/test_out_bias_std.py
@@ -7,6 +7,9 @@
 from deepmd.tf.descriptor.se_a import (
     DescrptSeA,
 )
+from deepmd.tf.env import (
+    tf,
+)
 from deepmd.tf.fit.dipole import (
     DipoleFittingSeA,
 )
@@ -21,6 +24,16 @@
 class TestOutBiasStd(unittest.TestCase):
     """Test out_bias and out_std functionality in TensorFlow backend."""
 
+    def setUp(self):
+        """Resets the default graph before each test."""
+        super().setUp()
+        tf.reset_default_graph()
+
+    def tearDown(self):
+        """Resets the default graph after each test."""
+        tf.reset_default_graph()
+        super().tearDown()
+
     def test_init_out_stat_basic(self):
         """Test basic init_out_stat functionality."""
         descriptor = DescrptSeA(