Fix ce 2.6 (#59977)

wentaoyu · web-flow · commit 56eaf0ef4a11 · 2023-12-14T20:55:11.000+08:00
* Fix comments for PR #59644 (#59885) * update * update * Fix comments for PR #59644 (#59750) * tinyfix for PR #59644 * tinyfix * tinyfix * update * update
diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h
@@ -23,6 +23,7 @@
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/allocator.h"
+#include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/flags.h"
 
 #ifdef PADDLE_WITH_NCCL
@@ -143,22 +144,22 @@ using DecoratedAllocationPtr =
 
 template <typename T>
 static T&& FillValue(T&& allocation) {
-#if defined(PADDLE_WITH_NCCL)
+#if defined(PADDLE_WITH_CUDA)
   if (allocation != nullptr) {
     if (FLAGS_sync_after_alloc || FLAGS_alloc_fill_value >= 0) {
-      cudaDeviceSynchronize();
+      PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
       if (FLAGS_alloc_fill_value >= 0) {
         VLOG(10) << "Set " << FLAGS_alloc_fill_value << " on "
                  << allocation->ptr() << " " << allocation->place() << " "
                  << allocation->size();
         if (platform::is_gpu_place(allocation->place())) {
-          cudaMemset(
-              allocation->ptr(), FLAGS_alloc_fill_value, allocation->size());
+          PADDLE_ENFORCE_GPU_SUCCESS(cudaMemset(
+              allocation->ptr(), FLAGS_alloc_fill_value, allocation->size()));
         } else {
           std::memset(
               allocation->ptr(), FLAGS_alloc_fill_value, allocation->size());
         }
-        cudaDeviceSynchronize();
+        PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
       }
     }
   }
diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py
@@ -921,8 +921,6 @@ def __init__(self, layers, hcg, strategy):
         self._virtual_pp_rank = 0
         self._reset_counter()
 
-        self._assign_vpp_info(self.model_chunks)
-
     def _check_sanity(self):
         assert (
             framework.in_dynamic_mode()
diff --git a/python/paddle/distributed/fleet/utils/tensor_fusion_helper.py b/python/paddle/distributed/fleet/utils/tensor_fusion_helper.py
@@ -461,6 +461,15 @@ def scale_grads(self):
 
         self._reset_params_checked_in()
 
+    @imperative_base.no_grad
+    def scale_and_split_grads(self):
+        assert self._task is not None, "Task is not initialized. "
+        self._task.wait()
+        scale_factor = 1.0 / self._comm_group.nranks
+        self.grad_storage.scale_(scale_factor)
+
+        self._reset_params_checked_in()
+
 
 def obtain_storage(
     parameters,