make unit test to work

chengduoZH · chengduoZH · commit 3c5bbf42c4a3 · 2018-04-16T22:52:46.000+08:00
diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
@@ -14,7 +14,7 @@
 
 #include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h"
 #include <algorithm>
-#include "paddle/fluid/framework/details/reduce_util.h"
+#include "paddle/fluid/framework/details/reduce_and_gather.h"
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc
@@ -121,7 +121,7 @@ void ReduceOpHandle::RunImpl() {
         auto &p = in_places[i];
         auto &lod_tensor = lod_tensors[i];
         int dev_id = boost::get<platform::CUDAPlace>(p).device;
-        auto &nccl_ctx = nccl_ctxs_.at(dev_id);
+        auto &nccl_ctx = nccl_ctxs_->at(dev_id);
         auto stream = nccl_ctx.stream();
         auto comm = nccl_ctx.comm_;
 
diff --git a/paddle/fluid/framework/details/reduce_op_handle.h b/paddle/fluid/framework/details/reduce_op_handle.h
@@ -34,13 +34,15 @@ struct ReduceOpHandle : public OpHandleBase {
   const std::vector<platform::Place> &places_;
 
 #ifdef PADDLE_WITH_CUDA
-  const platform::NCCLContextMap &nccl_ctxs_;
+  const platform::NCCLContextMap *nccl_ctxs_;
   ReduceOpHandle(const std::vector<Scope *> &local_scopes,
                  const std::vector<platform::Place> &places,
-                 const platform::NCCLContextMap &nccl_ctxs)
+                 const platform::NCCLContextMap *nccl_ctxs)
       : local_scopes_(local_scopes), places_(places), nccl_ctxs_(nccl_ctxs) {
-    for (auto &p_ctx : nccl_ctxs_.contexts_) {
-      dev_ctxes_[platform::CUDAPlace(p_ctx.first)] = p_ctx.second.ctx_.get();
+    if (nccl_ctxs_) {
+      for (auto &p_ctx : nccl_ctxs_->contexts_) {
+        dev_ctxes_[platform::CUDAPlace(p_ctx.first)] = p_ctx.second.ctx_.get();
+      }
     }
   }
 #else
diff --git a/paddle/fluid/framework/details/reduce_op_handle_test.cc b/paddle/fluid/framework/details/reduce_op_handle_test.cc
@@ -44,7 +44,9 @@ struct TestReduceOpHandle {
       ctxs_[j]->Wait();
     }
 #ifdef PADDLE_WITH_CUDA
-    nccl_ctxs_->WaitAll();
+    if (nccl_ctxs_) {
+      nccl_ctxs_->WaitAll();
+    }
 #endif
   }
 
@@ -64,6 +66,7 @@ struct TestReduceOpHandle {
         gpu_list_.push_back(p);
         ctxs_.emplace_back(new p::CUDADeviceContext(p));
       }
+      nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
 #else
       PADDLE_THROW("CUDA is not support.");
 #endif
@@ -74,10 +77,10 @@ struct TestReduceOpHandle {
         gpu_list_.push_back(p);
         ctxs_.emplace_back(new p::CPUDeviceContext(p));
       }
-    }
 #ifdef PADDLE_WITH_CUDA
-    nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
+      nccl_ctxs_.reset(nullptr);
 #endif
+    }
   }
 
   void InitReduceOp(size_t input_scope_idx) {
@@ -87,15 +90,27 @@ struct TestReduceOpHandle {
     }
     local_scopes_[input_scope_idx]->Var("input");
 
+    if (use_gpu_) {
+#ifdef PADDLE_WITH_CUDA
+      op_handle_.reset(
+          new ReduceOpHandle(local_scopes_, gpu_list_, nccl_ctxs_.get()));
+#else
+      PADDLE_THROW("CUDA is not support.");
+#endif
+    } else {
 #ifdef PADDLE_WITH_CUDA
-    op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_, *nccl_ctxs_));
+      op_handle_.reset(
+          new ReduceOpHandle(local_scopes_, gpu_list_, nccl_ctxs_.get()));
 #else
-    op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_));
+      op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_));
 #endif
+    }
 
     // add input
     for (size_t j = 0; j < gpu_list_.size(); ++j) {
-      op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get();
+      if (!use_gpu_) {
+        op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get();
+      }
       vars_.emplace_back(new VarHandle());
       VarHandle *in_var_handle = static_cast<VarHandle *>(vars_.back().get());
       in_var_handle->place_ = gpu_list_[j];
@@ -236,25 +251,31 @@ TEST(ReduceTester, TestCPUReduceTestSelectedRows) {
   test_op.InitReduceOp(input_scope_idx);
   test_op.TestReduceSelectedRows(input_scope_idx);
 }
+TEST(ReduceTester, TestCPUReduceTestLodTensor) {
+  TestReduceOpHandle test_op;
+  size_t input_scope_idx = 0;
+  test_op.InitCtxOnGpu(false);
+  test_op.InitReduceOp(input_scope_idx);
+  test_op.TestReduceLodTensors(input_scope_idx);
+}
+#ifdef PADDLE_WITH_CUDA
 
-// #ifdef PADDLE_WITH_CUDA
-//
-// TEST(ReduceTester, TestGPUReduceTestSelectedRows) {
-//  TestReduceOpHandle test_op;
-//  size_t input_scope_idx = 0;
-//  test_op.InitCtxOnGpu(true);
-//  test_op.InitReduceOp(input_scope_idx);
-//  test_op.TestReduceSelectedRows(input_scope_idx);
-// }
-//
-// TEST(ReduceTester, TestCPUReduceTestLodTensor) {
-//  TestReduceOpHandle test_op;
-//  size_t input_scope_idx = 0;
-//  test_op.InitCtxOnGpu(true);
-//  test_op.InitReduceOp(input_scope_idx);
-//  test_op.TestReduceLodTensors(input_scope_idx);
-// }
-// #endif
+TEST(ReduceTester, TestGPUReduceTestSelectedRows) {
+  TestReduceOpHandle test_op;
+  size_t input_scope_idx = 0;
+  test_op.InitCtxOnGpu(true);
+  test_op.InitReduceOp(input_scope_idx);
+  test_op.TestReduceSelectedRows(input_scope_idx);
+}
+
+TEST(ReduceTester, TestGPUReduceTestLodTensor) {
+  TestReduceOpHandle test_op;
+  size_t input_scope_idx = 0;
+  test_op.InitCtxOnGpu(true);
+  test_op.InitReduceOp(input_scope_idx);
+  test_op.TestReduceLodTensors(input_scope_idx);
+}
+#endif
 
 }  // namespace details
 }  // namespace framework
diff --git a/paddle/fluid/framework/details/reduce_util.h b/paddle/fluid/framework/details/reduce_util.h