Merge pull request #1482 from qingqing01/data_convert

qingqing01 · web-flow · commit df37132945ba · 2017-03-01T15:54:27.000+08:00
Fix dataprovider_converter when using only one GPU
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
@@ -47,6 +47,9 @@ void setUseGpu(bool useGpu);
 /// Return true if this py_paddle is compiled in GPU Version
 bool isGpuVersion();
 
+/// Return FLAGS_trainer_count
+int getTrainerCount();
+
 /// The Error of IO Operation. Such as file not found, etc.
 class IOError {};
 
diff --git a/paddle/api/Util.cpp b/paddle/api/Util.cpp
@@ -54,5 +54,7 @@ bool isGpuVersion() {
 #endif
 }
 
+int getTrainerCount() { return FLAGS_trainer_count; }
+
 static_assert(NUM_PARAMETER_TYPES == paddle::NUM_PARAMETER_TYPES,
               "The Parameter Type should be same in core/api and core/common");
diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py
@@ -26,6 +26,15 @@ def __init__(self, input_type, pos):
         if not isinstance(self.input_type, dp2.InputType):
             raise ValueError("input type should be dataprovider2.InputType")
         self.pos = pos
+        # data_in_gpu is used to indicate whether to create argument on GPU
+        # or not in GPU mode. Now if using one thread (trainer_count=1),
+        # trainer uses NeuralNetwork which needs to create argument on GPU
+        # before calling forward function. So, set data_in_gpu to True.
+        # Otherwise, trainer uses MultiGradientMachine which will transfer
+        # data from CPU to GPU in the forward function, set data_in_gpu to
+        # False in this case.
+        self.data_in_gpu = swig_paddle.isUsingGpu(
+        ) and swig_paddle.getTrainerCount() == 1
 
     def scan(self, dat):
         pass
@@ -53,7 +62,8 @@ def finish_scan(self, argument):
         assert isinstance(argument, swig_paddle.Arguments)
         if self.__mat__.dtype != numpy.float32:
             self.__mat__ = self.__mat__.astype(numpy.float32)
-        m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False)
+        m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True,
+                                                    self.data_in_gpu)
         argument.setSlotValue(self.pos, m)
 
 
@@ -75,10 +85,13 @@ def extend_cols(self, dat):
 
     def finish_scan(self, argument):
         assert isinstance(argument, swig_paddle.Arguments)
-        m = swig_paddle.Matrix.createSparse(self.__height__,
-                                            self.input_type.dim,
-                                            len(self.__cols__),
-                                            len(self.__value__) == 0)
+        m = swig_paddle.Matrix.createSparse(
+            self.__height__,
+            self.input_type.dim,
+            len(self.__cols__),
+            len(self.__value__) == 0,
+            False,  # trans
+            False)  # TODO supoort GPU
         assert isinstance(m, swig_paddle.Matrix)
         m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__)
         argument.setSlotValue(self.pos, m)
@@ -102,7 +115,7 @@ def scan(self, dat):
         self.__ids__.append(dat)
 
     def finish_scan(self, argument):
-        ids = swig_paddle.IVector.create(self.__ids__)
+        ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu)
         assert isinstance(argument, swig_paddle.Arguments)
         argument.setSlotIds(self.pos, ids)
 
diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py
@@ -235,4 +235,8 @@ def test_multiple_features_tuple(self):
 
 if __name__ == '__main__':
     api.initPaddle("--use_gpu=0")
-    unittest.main()
+    suite = unittest.TestLoader().loadTestsFromTestCase(DataFeederTest)
+    unittest.TextTestRunner().run(suite)
+    if api.isGpuVersion():
+        api.setUseGpu(True)
+        unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -54,5 +54,7 @@ bool isGpuVersion() {`
`54`	`54`	`#endif`
`55`	`55`	`}`
`56`	`56`
	`57`	`+int getTrainerCount() { return FLAGS_trainer_count; }`
	`58`	`+`
`57`	`59`	`static_assert(NUM_PARAMETER_TYPES == paddle::NUM_PARAMETER_TYPES,`
`58`	`60`	`"The Parameter Type should be same in core/api and core/common");`