Merge pull request #10741 from jacquesqiao/inferencer-support-multi-gpu

jacquesqiao · web-flow · commit 54ae8e4520c8 · 2018-05-18T11:55:00.000+08:00
Inferencer support parallel_executor
diff --git a/python/paddle/fluid/inferencer.py b/python/paddle/fluid/inferencer.py
@@ -12,52 +12,68 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
+
 import core
 
 import executor
 import framework
 import io
+import parallel_executor
 import unique_name
 from trainer import check_and_get_place
 
 __all__ = ['Inferencer', ]
 
 
 class Inferencer(object):
-    def __init__(self, infer_func, param_path, place=None):
+    def __init__(self, infer_func, param_path, place=None, parallel=False):
         """
         :param infer_func: a function that will return predict Variable
         :param param_path: the path where the inference model is saved by fluid.io.save_params
         :param place: place to do the inference
+        :param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
         """
         self.param_path = param_path
         self.scope = core.Scope()
+        self.parallel = parallel
+        self.place = check_and_get_place(place)
 
         self.inference_program = framework.Program()
         with framework.program_guard(self.inference_program):
             with unique_name.guard():
                 self.predict_var = infer_func()
 
-        self.exe = executor.Executor(check_and_get_place(place))
-        with executor.scope_guard(self.scope):
+        with self._prog_and_scope_guard():
             # load params from param_path into scope
-            io.load_params(self.exe, param_path, self.inference_program)
+            io.load_params(executor.Executor(self.place), param_path)
+
+        if parallel:
+            with self._prog_and_scope_guard():
+                self.exe = parallel_executor.ParallelExecutor(
+                    use_cuda=isinstance(self.place, core.CUDAPlace),
+                    loss_name=self.predict_var.name)
+        else:
+            self.exe = executor.Executor(self.place)
 
-    def infer(self, inputs, return_numpy=True):
+    def infer(self, inputs):
         """
         :param inputs: a map of {"input_name": input_var} that will be feed into the inference program
         to get the predict value
-        :param return_numpy: if return numpy value for row tensor
         :return: the predict value of the inference model
         """
         if not isinstance(inputs, dict):
             raise ValueError(
                 "inputs should be a map of {'input_name': input_var}")
 
-        with executor.scope_guard(self.scope):
-            results = self.exe.run(self.inference_program,
-                                   feed=inputs,
-                                   fetch_list=[self.predict_var],
-                                   return_numpy=return_numpy)
+        with self._prog_and_scope_guard():
+            results = self.exe.run(feed=inputs,
+                                   fetch_list=[self.predict_var.name])
 
         return results
+
+    @contextlib.contextmanager
+    def _prog_and_scope_guard(self):
+        with framework.program_guard(main_program=self.inference_program):
+            with executor.scope_guard(self.scope):
+                yield
diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
@@ -94,7 +94,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
     tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
 
     results = inferencer.infer({'x': tensor_x})
-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))
 
 
 def main(use_cuda):
diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py
@@ -112,7 +112,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
 
     results = inferencer.infer({'img': tensor_img})
 
-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))
 
 
 def main(use_cuda):
diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py
@@ -93,7 +93,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
 
     results = inferencer.infer({'img': tensor_img})
 
-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))
 
 
 def main(use_cuda):
diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py
@@ -127,14 +127,12 @@ def infer(use_cuda, inference_program, save_path):
     third_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
     fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
 
-    result = inferencer.infer(
-        {
-            'firstw': first_word,
-            'secondw': second_word,
-            'thirdw': third_word,
-            'forthw': fourth_word
-        },
-        return_numpy=False)
+    result = inferencer.infer({
+        'firstw': first_word,
+        'secondw': second_word,
+        'thirdw': third_word,
+        'forthw': fourth_word
+    })
     print(np.array(result[0]))
 
 
diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
 import os
+
 import core
-import framework
-import executor
+
 import data_feeder
-import contextlib
+import executor
+import framework
 import io
-import unique_name
-import parallel_executor
-
 # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
 import optimizer as opt_module
+import parallel_executor
 from transpiler import distribute_transpiler
 
 __all__ = [