PaddlePaddle
diff --git a/‎core/trainers/framework/network.py‎
Lines changed: 5 additions & 0 deletions b/‎core/trainers/framework/network.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎core/trainers/framework/runner.py‎
Lines changed: 76 additions & 14 deletions b/‎core/trainers/framework/runner.py‎
Lines changed: 76 additions & 14 deletions
diff --git a/‎core/trainers/framework/startup.py‎
Lines changed: 18 additions & 0 deletions b/‎core/trainers/framework/startup.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎core/trainers/general_trainer.py‎
Lines changed: 7 additions & 3 deletions b/‎core/trainers/general_trainer.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎core/utils/validation.py‎
Lines changed: 54 additions & 29 deletions b/‎core/utils/validation.py‎
Lines changed: 54 additions & 29 deletions
diff --git a/‎models/match/dssm/synthetic_evaluate_reader.py‎
Lines changed: 2 additions & 2 deletions b/‎models/match/dssm/synthetic_evaluate_reader.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎models/match/dssm/synthetic_reader.py‎
Lines changed: 5 additions & 3 deletions b/‎models/match/dssm/synthetic_reader.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎models/rank/afm/model.py‎
Lines changed: 1 addition & 1 deletion b/‎models/rank/afm/model.py‎
Lines changed: 1 addition & 1 deletion
@@ -94,6 +94,7 @@ def build_network(self, context):
             context["model"][model_dict["name"]]["model"] = model
             context["model"][model_dict["name"]][
                 "default_main_program"] = train_program.clone()
+            context["model"][model_dict["name"]]["compiled_program"] = None
 
         context["dataset"] = {}
         for dataset in context["env"]["dataset"]:
@@ -149,6 +150,7 @@ def build_network(self, context):
         context["model"][model_dict["name"]]["model"] = model
         context["model"][model_dict["name"]]["default_main_program"] = context[
             "fleet"].main_program.clone()
+        context["model"][model_dict["name"]]["compiled_program"] = None
 
         if context["fleet"].is_server():
             self._server(context)
@@ -245,6 +247,8 @@ def build_network(self, context):
                     context["model"][model_dict["name"]]["model"] = model
                     context["model"][model_dict["name"]][
                         "default_main_program"] = train_program.clone()
+                    context["model"][model_dict["name"]][
+                        "compile_program"] = None
 
         if context["fleet"].is_server():
             self._server(context)
@@ -314,6 +318,7 @@ def build_network(self, context):
                 context["model"][model_dict["name"]]["model"] = model
                 context["model"][model_dict["name"]][
                     "default_main_program"] = train_program
+                context["model"][model_dict["name"]]["compiled_program"] = None
 
         context["dataset"] = {}
         for dataset in context["env"]["dataset"]:
 
@@ -50,6 +50,7 @@ def _executor_dataset_train(self, model_dict, context):
         reader_name = model_dict["dataset_name"]
         model_name = model_dict["name"]
         model_class = context["model"][model_dict["name"]]["model"]
+
         fetch_vars = []
         fetch_alias = []
         fetch_period = int(
@@ -89,19 +90,7 @@ def _executor_dataset_train(self, model_dict, context):
     def _executor_dataloader_train(self, model_dict, context):
         model_name = model_dict["name"]
         model_class = context["model"][model_dict["name"]]["model"]
-
-        if context["is_infer"]:
-            program = context["model"][model_name]["main_program"]
-        elif context["is_fleet"]:
-            if context["fleet_mode"].upper() == "PS":
-                program = self._get_ps_program(model_dict, context)
-            elif context["fleet_mode"].upper() == "COLLECTIVE":
-                program = context["model"][model_name]["main_program"]
-        elif not context["is_fleet"]:
-            if context["device"].upper() == "CPU":
-                program = self._get_single_cpu_program(model_dict, context)
-            elif context["device"].upper() == "GPU":
-                program = self._get_single_gpu_program(model_dict, context)
+        program = self._get_dataloader_program(model_dict, context)
 
         reader_name = model_dict["dataset_name"]
         fetch_vars = []
@@ -143,6 +132,24 @@ def _executor_dataloader_train(self, model_dict, context):
             except fluid.core.EOFException:
                 reader.reset()
 
+    def _get_dataloader_program(self, model_dict, context):
+        model_name = model_dict["name"]
+        if context["model"][model_name]["compiled_program"] == None:
+            if context["is_infer"]:
+                program = context["model"][model_name]["main_program"]
+            elif context["is_fleet"]:
+                if context["fleet_mode"].upper() == "PS":
+                    program = self._get_ps_program(model_dict, context)
+                elif context["fleet_mode"].upper() == "COLLECTIVE":
+                    program = context["model"][model_name]["main_program"]
+            elif not context["is_fleet"]:
+                if context["device"].upper() == "CPU":
+                    program = self._get_single_cpu_program(model_dict, context)
+                elif context["device"].upper() == "GPU":
+                    program = self._get_single_gpu_program(model_dict, context)
+            context["model"][model_name]["compiled_program"] = program
+        return context["model"][model_name]["compiled_program"]
+
     def _get_strategy(self, model_dict, context):
         _build_strategy = fluid.BuildStrategy()
         _exe_strategy = fluid.ExecutionStrategy()
@@ -218,12 +225,17 @@ def _get_ps_program(self, model_dict, context):
 
     def save(self, epoch_id, context, is_fleet=False):
         def need_save(epoch_id, epoch_interval, is_last=False):
+            name = "runner." + context["runner_name"] + "."
+            total_epoch = int(envs.get_global_env(name + "epochs", 1))
+            if epoch_id + 1 == total_epoch:
+                is_last = True
+
             if is_last:
                 return True
             if epoch_id == -1:
                 return False
 
-            return epoch_id % epoch_interval == 0
+            return (epoch_id + 1) % epoch_interval == 0
 
         def save_inference_model():
             name = "runner." + context["runner_name"] + "."
@@ -415,3 +427,53 @@ def run(self, context):
 
         """
         context["status"] = "terminal_pass"
+
+
+class SingleInferRunner(RunnerBase):
+    def __init__(self, context):
+        print("Running SingleInferRunner.")
+        pass
+
+    def run(self, context):
+        self._dir_check(context)
+
+        for index, epoch_name in enumerate(self.epoch_model_name_list):
+            for model_dict in context["phases"]:
+                self._load(context, model_dict,
+                           self.epoch_model_path_list[index])
+                begin_time = time.time()
+                self._run(context, model_dict)
+                end_time = time.time()
+                seconds = end_time - begin_time
+                print("Infer {} of {} done, use time: {}".format(model_dict[
+                    "name"], epoch_name, seconds))
+        context["status"] = "terminal_pass"
+
+    def _load(self, context, model_dict, model_path):
+        if model_path is None or model_path == "":
+            return
+        print("load persistables from", model_path)
+
+        with fluid.scope_guard(context["model"][model_dict["name"]]["scope"]):
+            train_prog = context["model"][model_dict["name"]]["main_program"]
+            startup_prog = context["model"][model_dict["name"]][
+                "startup_program"]
+            with fluid.program_guard(train_prog, startup_prog):
+                fluid.io.load_persistables(
+                    context["exe"], model_path, main_program=train_prog)
+
+    def _dir_check(self, context):
+        dirname = envs.get_global_env(
+            "runner." + context["runner_name"] + ".init_model_path", None)
+        self.epoch_model_path_list = []
+        self.epoch_model_name_list = []
+
+        for file in os.listdir(dirname):
+            file_path = os.path.join(dirname, file)
+            if os.path.isdir(file_path):
+                self.epoch_model_path_list.append(file_path)
+                self.epoch_model_name_list.append(file)
+
+        if len(self.epoch_model_path_list) == 0:
+            self.epoch_model_path_list.append(dirname)
+            self.epoch_model_name_list.append(dirname)
@@ -101,3 +101,21 @@ def startup(self, context):
                 context["exe"].run(startup_prog)
                 self.load(context, True)
         context["status"] = "train_pass"
+
+
+class SingleInferStartup(StartupBase):
+    def __init__(self, context):
+        print("Running SingleInferStartup.")
+        pass
+
+    def startup(self, context):
+        for model_dict in context["phases"]:
+            with fluid.scope_guard(context["model"][model_dict["name"]][
+                    "scope"]):
+                train_prog = context["model"][model_dict["name"]][
+                    "main_program"]
+                startup_prog = context["model"][model_dict["name"]][
+                    "startup_program"]
+                with fluid.program_guard(train_prog, startup_prog):
+                    context["exe"].run(startup_prog)
+        context["status"] = "train_pass"
@@ -101,7 +101,9 @@ def startup(self, context):
             startup_class = envs.lazy_instance_by_fliename(startup_class_path,
                                                            "Startup")(context)
         else:
-            if self.engine == EngineMode.SINGLE:
+            if self.engine == EngineMode.SINGLE and context["is_infer"]:
+                startup_class_name = "SingleInferStartup"
+            elif self.engine == EngineMode.SINGLE and not context["is_infer"]:
                 startup_class_name = "SingleStartup"
             elif self.fleet_mode == FleetMode.PS or self.fleet_mode == FleetMode.PSLIB:
                 startup_class_name = "PSStartup"
@@ -117,12 +119,14 @@ def startup(self, context):
 
     def runner(self, context):
         runner_class_path = envs.get_global_env(
-            self.runner_env_name + ".runner_class_paht", default_value=None)
+            self.runner_env_name + ".runner_class_path", default_value=None)
         if runner_class_path:
             runner_class = envs.lazy_instance_by_fliename(runner_class_path,
                                                           "Runner")(context)
         else:
-            if self.engine == EngineMode.SINGLE:
+            if self.engine == EngineMode.SINGLE and context["is_infer"]:
+                runner_class_name = "SingleInferRunner"
+            elif self.engine == EngineMode.SINGLE and not context["is_infer"]:
                 runner_class_name = "SingleRunner"
             elif self.fleet_mode == FleetMode.PSLIB:
                 runner_class_name = "PslibRunner"
 
@@ -16,38 +16,47 @@
 
 
 class ValueFormat:
-    def __init__(self, value_type, value, value_handler):
+    def __init__(self, value_type, value, value_handler, required=False):
         self.value_type = value_type
-        self.value = value
         self.value_handler = value_handler
+        self.value = value
+        self.required = required
 
     def is_valid(self, name, value):
-        ret = self.is_type_valid(name, value)
+
+        if not self.value_type:
+            ret = True
+        else:
+            ret = self.is_type_valid(name, value)
+
         if not ret:
             return ret
 
+        if not self.value or not self.value_handler:
+            return True
+
         ret = self.is_value_valid(name, value)
         return ret
 
     def is_type_valid(self, name, value):
         if self.value_type == "int":
             if not isinstance(value, int):
                 print("\nattr {} should be int, but {} now\n".format(
-                    name, self.value_type))
+                    name, type(value)))
                 return False
             return True
 
         elif self.value_type == "str":
             if not isinstance(value, str):
                 print("\nattr {} should be str, but {} now\n".format(
-                    name, self.value_type))
+                    name, type(value)))
                 return False
             return True
 
         elif self.value_type == "strs":
             if not isinstance(value, list):
                 print("\nattr {} should be list(str), but {} now\n".format(
-                    name, self.value_type))
+                    name, type(value)))
                 return False
             for v in value:
                 if not isinstance(v, str):
@@ -56,10 +65,29 @@ def is_type_valid(self, name, value):
                     return False
             return True
 
+        elif self.value_type == "dict":
+            if not isinstance(value, dict):
+                print("\nattr {} should be str, but {} now\n".format(
+                    name, type(value)))
+                return False
+            return True
+
+        elif self.value_type == "dicts":
+            if not isinstance(value, list):
+                print("\nattr {} should be list(dist), but {} now\n".format(
+                    name, type(value)))
+                return False
+            for v in value:
+                if not isinstance(v, dict):
+                    print("\nattr {} should be list(dist), but list({}) now\n".
+                          format(name, type(v)))
+                    return False
+            return True
+
         elif self.value_type == "ints":
             if not isinstance(value, list):
                 print("\nattr {} should be list(int), but {} now\n".format(
-                    name, self.value_type))
+                    name, type(value)))
                 return False
             for v in value:
                 if not isinstance(v, int):
@@ -74,7 +102,7 @@ def is_type_valid(self, name, value):
             return False
 
     def is_value_valid(self, name, value):
-        ret = self.value_handler(value)
+        ret = self.value_handler(name, value, self.value)
         return ret
 
 
@@ -112,38 +140,35 @@ def le_value_handler(name, value, values):
 
 def register():
     validations = {}
-    validations["train.workspace"] = ValueFormat("str", None, eq_value_handler)
-    validations["train.device"] = ValueFormat("str", ["cpu", "gpu"],
-                                              in_value_handler)
-    validations["train.epochs"] = ValueFormat("int", 1, ge_value_handler)
-    validations["train.engine"] = ValueFormat(
-        "str", ["train", "infer", "local_cluster_train", "cluster_train"],
-        in_value_handler)
-
-    requires = ["workspace", "dataset", "mode", "runner", "phase"]
-    return validations, requires
+    validations["workspace"] = ValueFormat("str", None, None, True)
+    validations["mode"] = ValueFormat(None, None, None, True)
+    validations["runner"] = ValueFormat("dicts", None, None, True)
+    validations["phase"] = ValueFormat("dicts", None, None, True)
+    validations["hyper_parameters"] = ValueFormat("dict", None, None, False)
+    return validations
 
 
 def yaml_validation(config):
-    all_checkers, require_checkers = register()
+    all_checkers = register()
+
+    require_checkers = []
+    for name, checker in all_checkers.items():
+        if checker.required:
+            require_checkers.append(name)
 
     _config = envs.load_yaml(config)
-    flattens = envs.flatten_environs(_config)
 
     for required in require_checkers:
-        if required not in flattens.keys():
+        if required not in _config.keys():
             print("\ncan not find {} in yaml, which is required\n".format(
                 required))
             return False
 
-    for name, flatten in flattens.items():
+    for name, value in _config.items():
         checker = all_checkers.get(name, None)
-
-        if not checker:
-            continue
-
-        ret = checker.is_valid(name, flattens)
-        if not ret:
-            return False
+        if checker:
+            ret = checker.is_valid(name, value)
+            if not ret:
+                return False
 
     return True
@@ -30,8 +30,8 @@ def reader():
             This function needs to be implemented by the user, based on data format
             """
             features = line.rstrip('\n').split('\t')
-            query = map(float, features[0].split(','))
-            pos_doc = map(float, features[1].split(','))
+            query = [float(feature) for feature in features[0].split(',')]
+            pos_doc = [float(feature) for feature in features[1].split(',')]
             feature_names = ['query', 'doc_pos']
 
             yield zip(feature_names, [query] + [pos_doc])
 
@@ -31,13 +31,15 @@ def reader():
             This function needs to be implemented by the user, based on data format
             """
             features = line.rstrip('\n').split('\t')
-            query = map(float, features[0].split(','))
-            pos_doc = map(float, features[1].split(','))
+            query = [float(feature) for feature in features[0].split(',')]
+            pos_doc = [float(feature) for feature in features[1].split(',')]
             feature_names = ['query', 'doc_pos']
             neg_docs = []
             for i in range(len(features) - 2):
                 feature_names.append('doc_neg_' + str(i))
-                neg_docs.append(map(float, features[i + 2].split(',')))
+                neg_docs.append([
+                    float(feature) for feature in features[i + 2].split(',')
+                ])
 
             yield zip(feature_names, [query] + [pos_doc] + neg_docs)
 
 
@@ -133,7 +133,7 @@ def net(self, inputs, is_infer=False):
             attention_h)  # (batch_size * (num_field*(num_field-1)/2)) * 1
         attention_out = fluid.layers.softmax(
             attention_out)  # (batch_size * (num_field*(num_field-1)/2)) * 1
-        num_interactions = self.num_field * (self.num_field - 1) / 2
+        num_interactions = int(self.num_field * (self.num_field - 1) / 2)
         attention_out = fluid.layers.reshape(
             attention_out,
             shape=[-1, num_interactions,