refine code

HydrogenSulfate · HydrogenSulfate · commit d236285db5e7 · 2025-06-19T13:40:24.000+08:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -65,13 +65,13 @@ repos:
       - id: clang-format
         exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$|.+\.json$)
   # markdown, yaml, CSS, javascript
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v4.0.0-alpha.8
-    hooks:
-      - id: prettier
-        types_or: [markdown, yaml, css]
-        # workflow files cannot be modified by pre-commit.ci
-        exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
+  # - repo: https://github.com/pre-commit/mirrors-prettier
+  #   rev: v4.0.0-alpha.8
+  #   hooks:
+  #     - id: prettier
+  #       types_or: [markdown, yaml, css]
+  #       # workflow files cannot be modified by pre-commit.ci
+  #       exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
   # Shell
   - repo: https://github.com/scop/pre-commit-shfmt
     rev: v3.11.0-1
@@ -83,25 +83,25 @@ repos:
     hooks:
       - id: cmake-format
       #- id: cmake-lint
-  - repo: https://github.com/njzjz/mirrors-bibtex-tidy
-    rev: v1.13.0
-    hooks:
-      - id: bibtex-tidy
-        args:
-          - --curly
-          - --numeric
-          - --align=13
-          - --blank-lines
-          # disable sort: the order of keys and fields has explict meanings
-          #- --sort=key
-          - --duplicates=key,doi,citation,abstract
-          - --merge=combine
-          #- --sort-fields
-          #- --strip-comments
-          - --trailing-commas
-          - --encode-urls
-          - --remove-empty-fields
-          - --wrap=80
+  # - repo: https://github.com/njzjz/mirrors-bibtex-tidy
+  #   rev: v1.13.0
+  #   hooks:
+  #     - id: bibtex-tidy
+  #       args:
+  #         - --curly
+  #         - --numeric
+  #         - --align=13
+  #         - --blank-lines
+  #         # disable sort: the order of keys and fields has explict meanings
+  #         #- --sort=key
+  #         - --duplicates=key,doi,citation,abstract
+  #         - --merge=combine
+  #         #- --sort-fields
+  #         #- --strip-comments
+  #         - --trailing-commas
+  #         - --encode-urls
+  #         - --remove-empty-fields
+  #         - --wrap=80
   # license header
   - repo: https://github.com/Lucas-C/pre-commit-hooks
     rev: v1.5.5
diff --git a/deepmd/pd/train/training.py b/deepmd/pd/train/training.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import contextlib
 import functools
 import logging
 import time
@@ -18,6 +19,7 @@
 from paddle.distributed import (
     fleet,
 )
+from paddle.distributed.fleet.utils import hybrid_parallel_util as hpu
 from paddle.framework import (
     core,
 )
@@ -741,16 +743,27 @@ def step(_step_id, task_key="Default") -> None:
                     pref_lr = _lr.start_lr
                 else:
                     pref_lr = cur_lr
-                with nvprof_context(enable_profiling, "Forward pass"):
-                    model_pred, loss, more_loss = self.wrapper(
-                        **input_dict,
-                        cur_lr=paddle.full([], pref_lr, DEFAULT_PRECISION),
-                        label=label_dict,
-                        task_key=task_key,
-                    )
+                sync_context = (
+                    self.wrapper.no_sync
+                    if self.world_size > 1
+                    else contextlib.nullcontext
+                )
+                with sync_context():
+                    with nvprof_context(enable_profiling, "Forward pass"):
+                        model_pred, loss, more_loss = self.wrapper(
+                            **input_dict,
+                            cur_lr=paddle.full([], pref_lr, DEFAULT_PRECISION),
+                            label=label_dict,
+                            task_key=task_key,
+                        )
+
+                    with nvprof_context(enable_profiling, "Backward pass"):
+                        loss.backward()
 
-                with nvprof_context(enable_profiling, "Backward pass"):
-                    loss.backward()
+                if self.world_size > 1:
+                    # fuse + allreduce manually before optimization if use DDP + no_sync
+                    # details in https://github.com/PaddlePaddle/Paddle/issues/48898#issuecomment-1343838622
+                    hpu.fused_allreduce_gradients(list(self.wrapper.parameters()), None)
 
                 if self.gradient_max_norm > 0.0:
                     with nvprof_context(enable_profiling, "Gradient clip"):
diff --git a/deepmd/pd/utils/env.py b/deepmd/pd/utils/env.py
@@ -27,7 +27,8 @@
     ncpus = os.cpu_count()
 NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(0, ncpus)))
 # Make sure DDP uses correct device if applicable
-LOCAL_RANK = paddle.distributed.get_rank()
+LOCAL_RANK = os.environ.get("PADDLE_LOCAL_RANK")
+LOCAL_RANK = int(0 if LOCAL_RANK is None else LOCAL_RANK)
 
 if os.environ.get("DEVICE") == "cpu" or paddle.device.cuda.device_count() <= 0:
     DEVICE = "cpu"