PaddlePaddle
diff --git a/‎core/trainers/framework/runner.py‎
Lines changed: 32 additions & 4 deletions b/‎core/trainers/framework/runner.py‎
Lines changed: 32 additions & 4 deletions
diff --git a/‎doc/yaml.md‎
Lines changed: 1 addition & 0 deletions b/‎doc/yaml.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎models/demo/movie_recommand/README.md‎
Lines changed: 29 additions & 0 deletions b/‎models/demo/movie_recommand/README.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎models/demo/movie_recommand/data_prepare.sh‎
Lines changed: 10 additions & 3 deletions b/‎models/demo/movie_recommand/data_prepare.sh‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎models/demo/movie_recommand/offline_test.sh‎
Lines changed: 8 additions & 5 deletions b/‎models/demo/movie_recommand/offline_test.sh‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎models/demo/movie_recommand/online_rank.sh‎
Lines changed: 6 additions & 5 deletions b/‎models/demo/movie_recommand/online_rank.sh‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎models/demo/movie_recommand/online_recall.sh‎
Lines changed: 5 additions & 4 deletions b/‎models/demo/movie_recommand/online_recall.sh‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎models/demo/movie_recommand/rank/config.yaml‎
Lines changed: 5 additions & 42 deletions b/‎models/demo/movie_recommand/rank/config.yaml‎
Lines changed: 5 additions & 42 deletions
diff --git a/‎models/demo/movie_recommand/rank/config_test_offline.yaml‎
Lines changed: 60 additions & 0 deletions b/‎models/demo/movie_recommand/rank/config_test_offline.yaml‎
Lines changed: 60 additions & 0 deletions
@@ -19,6 +19,7 @@
 import warnings
 import numpy as np
 import random
+import json
 import logging
 import paddle.fluid as fluid
 
@@ -147,17 +148,22 @@ def _executor_dataloader_train(self, model_dict, context):
         metrics_format = []
 
         if context["is_infer"]:
-            metrics_format.append("\t[Infer]\t{}: {{}}".format("batch"))
+            metrics_format.append("\t[Infer] {}: {{}}".format("batch"))
         else:
-            metrics_format.append("\t[Train]\t{}: {{}}".format("batch"))
+            metrics_format.append("\t[Train]")
+            if "current_epoch" in context:
+                metrics_format.append(" epoch: {}".format(context[
+                    "current_epoch"]))
+            metrics_format.append(" {}: {{}}".format("batch"))
 
         metrics_format.append("{}: {{:.2f}}s".format("time_each_interval"))
 
         metrics_names = ["total_batch"]
-
+        metrics_indexes = dict()
         for name, var in metrics.items():
             metrics_names.append(name)
             metrics_varnames.append(var.name)
+            metrics_indexes[var.name] = len(metrics_varnames) - 1
             metrics_format.append("{}: {{}}".format(name))
         metrics_format = ", ".join(metrics_format)
 
@@ -166,6 +172,7 @@ def _executor_dataloader_train(self, model_dict, context):
         batch_id = 0
         begin_time = time.time()
         scope = context["model"][model_name]["scope"]
+        runner_results = []
         result = None
         with fluid.scope_guard(scope):
             try:
@@ -182,18 +189,35 @@ def _executor_dataloader_train(self, model_dict, context):
                     ]
                     metrics.extend(metrics_rets)
 
+                    batch_runner_result = {}
+                    for k, v in metrics_indexes.items():
+                        batch_runner_result[k] = np.array(metrics_rets[
+                            v]).tolist()
+                    runner_results.append(batch_runner_result)
+
                     if batch_id % fetch_period == 0 and batch_id != 0:
                         end_time = time.time()
                         seconds = end_time - begin_time
                         metrics_logging = metrics[:]
                         metrics_logging = metrics.insert(1, seconds)
                         begin_time = end_time
-
                         logging.info(metrics_format.format(*metrics))
                     batch_id += 1
             except fluid.core.EOFException:
                 reader.reset()
 
+        runner_result_save_path = envs.get_global_env(
+            "runner." + context["runner_name"] + ".runner_result_dump_path",
+            None)
+        if runner_result_save_path:
+            if "current_epoch" in context:
+                runner_result_save_path = runner_result_save_path + "_epoch_{}".format(
+                    context["current_epoch"])
+            logging.info("Dump runner result in {}".format(
+                runner_result_save_path))
+            with open(runner_result_save_path, 'w+') as fout:
+                json.dump(runner_results, fout)
+
         if batch_id > 0:
             result = dict(zip(metrics_names, metrics))
         return result
@@ -402,6 +426,7 @@ def run(self, context):
                     filelist = context["file_list"]
                     context["file_list"] = shuffle_files(need_shuffle_files,
                                                          filelist)
+                context["current_epoch"] = epoch
                 begin_time = time.time()
                 result = self._run(context, model_dict)
                 end_time = time.time()
@@ -450,6 +475,7 @@ def run(self, context):
                 filelist = context["file_list"]
                 context["file_list"] = shuffle_files(need_shuffle_files,
                                                      filelist)
+            context["current_epoch"] = epoch
             begin_time = time.time()
             result = self._run(context, model_dict)
             end_time = time.time()
@@ -500,6 +526,7 @@ def run(self, context):
                 filelist = context["file_list"]
                 context["file_list"] = shuffle_files(need_shuffle_files,
                                                      filelist)
+            context["current_epoch"] = epoch
             begin_time = time.time()
             self._run(context, model_dict)
             end_time = time.time()
@@ -533,6 +560,7 @@ def run(self, context):
                 filelist = context["file_list"]
                 context["file_list"] = shuffle_files(need_shuffle_files,
                                                      filelist)
+            context["current_epoch"] = epoch
             begin_time = time.time()
             self._run(context, model_dict)
             end_time = time.time()
 
@@ -38,6 +38,7 @@
 |       runner_class_path       |    string    |                           路径                            |    否    |                      自定义runner流程实现的地址                      |
 |      terminal_class_path      |    string    |                           路径                            |    否    |                     自定义terminal流程实现的地址                     |
 |  init_pretraining_model_path  |    string    |                           路径                            |    否    |自定义的startup流程中需要传入这个参数，finetune中需要加载的参数的地址 |
+|  runner_result_dump_path  |    string    |                           路径                            |    否    | 运行中metrics的结果使用json.dump到文件的地址，若是在训练的runner中使用, 会自动加上epoch后缀 |
 
 
 
 
@@ -0,0 +1,29 @@
+# PaddleRec 基于 Movielens 数据集的全流程示例
+
+## 模型的详细教程可以查阅： [十分钟！全流程！从零搭建推荐系统](https://aistudio.baidu.com/aistudio/projectdetail/559336)
+
+## 本地运行流程
+
+在本地需要安装`PaddleRec`及`PaddlePaddle`，推荐在`Linux` + `python2.7` 环境下执行此demo
+
+本地运行流程与AiStudio流程基本一致，细节略有区别
+
+### 离线训练
+```shell
+sh train.sh
+```
+
+### 离线测试
+```shell
+sh offline_test.sh
+```
+
+### 模拟在线召回
+```shell
+sh online_recall.sh
+```
+
+### 模拟在线排序
+```shell
+sh online_rank.sh
+```
@@ -1,18 +1,25 @@
 cd data
 
+echo "---> Download movielens 1M data ..."
 wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
+echo "---> Unzip ml-1m.zip ..."
 unzip ml-1m.zip
+rm ml-1m.zip
 
+echo "---> Split movielens data ..."
 python split.py
 
-mkdir train/
-mkdir test/
+mkdir -p train/
+mkdir -p test/
 
+echo "---> Process train & test data ..."
 python process_ml_1m.py process_raw ./ml-1m/train.dat | sort -t $'\t' -k 9 -n > log.data.train
 python process_ml_1m.py process_raw ./ml-1m/test.dat | sort -t $'\t' -k 9 -n > log.data.test
 python process_ml_1m.py hash log.data.train > ./train/data.txt
 python process_ml_1m.py hash log.data.test > ./test/data.txt
 
 rm log.data.train
 rm log.data.test
-cd ../
+cd ..
+
+echo "---> Finish data process"
@@ -1,12 +1,15 @@
 ## modify config.yaml to infer mode at first
 
-cd recall
-python -m paddlerec.run -m ./config.yaml
-cd ../rank
-python -m paddlerec.run -m ./config.yaml
-cd ..
+echo "Recall offline test ..."
+echo "Model config at models/demo/movie_recommand/recall/config_offline_test.yaml"
+python -m paddlerec.run -m ./recall/config_test_offline.yaml 
+
+echo "Rank offline test ..."
+echo "Model config at models/demo/movie_recommand/rank/config_offline_test.yaml"
+python -m paddlerec.run -m ./rank/config_test_offline.yaml 
 
 echo "recall offline test result:"
 python parse.py recall_offline recall/infer_result
+
 echo "rank offline test result:"
 python parse.py rank_offline rank/infer_result
@@ -1,8 +1,9 @@
 cd data
+echo "Create online test data ..."
 python process_ml_1m.py data_rank > online_user/test/data.txt
 
-## modify recall/config.yaml to online_infer mode
-cd ../rank
-python -m paddlerec.run -m ./config.yaml
-cd ../
-python parse.py rank_online rank/infer_result
+cd ..
+echo "Rank online test ..."
+echo "Model config at models/demo/movie_recommand/rank/config_online_test.yaml"
+python -m paddlerec.run -m ./rank/config_test_online.yaml
+python parse.py rank_online ./rank/infer_result
@@ -1,9 +1,10 @@
 cd data
+echo "Create online test data ..."
 mkdir online_user/test
 python process_ml_1m.py data_recall > online_user/test/data.txt
 
-## modify recall/config.yaml to online_infer mode
-cd ../recall
-python -m paddlerec.run -m ./config.yaml
-cd ../
+cd ..
+echo "Recall online test ..."
+echo "Model config at models/demo/movie_recommand/recall/config_online_test.yaml"
+python -m paddlerec.run -m ./recall/config_test_online.yaml
 python parse.py recall_online recall/infer_result
@@ -12,28 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-workspace: "models/demo/movie_recommand"
+workspace: "./"
 
 # list of dataset
 dataset:
 - name: dataset_train # name of dataset to distinguish different datasets
   batch_size: 128
-  type: QueueDataset 
+  type: DataLoader 
   data_path: "{workspace}/data/train"
   sparse_slots: "logid time userid gender age occupation movieid title genres label"
   dense_slots: ""
-- name: dataset_infer # name
-  batch_size: 128
-  type: DataLoader
-  data_path: "{workspace}/data/test"
-  sparse_slots: "logid time userid gender age occupation movieid title genres label"
-  dense_slots: ""
-- name: dataset_online_infer # name
-  batch_size: 10
-  type: DataLoader
-  data_path: "{workspace}/data/online_user/test"
-  sparse_slots: "logid time userid gender age occupation movieid title genres label"
-  dense_slots: ""
 
 # hyper parameters of user-defined network
 hyper_parameters:
@@ -51,42 +39,17 @@ hyper_parameters:
 # train
 mode: runner_train
 
-## online or offline infer
-#mode: runner_infer
 runner:
 - name: runner_train
   class: train
   save_checkpoint_interval: 1 # save model interval of epochs
-  save_inference_interval: 1 # save inference
-  save_checkpoint_path: "increment" # save checkpoint path
-  save_inference_path: "inference" # save inference path
+  save_checkpoint_path: "increment_rank" # save checkpoint path
   epochs: 10
   device: cpu
 
-- name: runner_infer
-  class: infer
-  print_interval: 10000
-  init_model_path: "increment/9" # load model path
-
 #train
 phase:
 - name: phase1
-  model: "{workspace}/model.py" # user-defined model
+  model: "{workspace}/rank/model.py" # user-defined model
   dataset_name: dataset_train # select dataset by name
-  thread_num: 12
-
-##offline infer
-#phase:
-#- name: phase1
-#  model: "{workspace}/model.py" # user-defined model
-#  dataset_name: dataset_infer # select dataset by name
-#  save_path: "./infer_result"
-#  thread_num: 1
-
-##offline infer
-#phase:
-#- name: phase1
-#  model: "{workspace}/model.py" # user-defined model
-#  dataset_name: dataset_online_infer # select dataset by name
-#  save_path: "./infer_result"
-#  thread_num: 1
+  thread_num: 4
@@ -0,0 +1,60 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#workspace: "paddlerec.models.demo.movie_recommand"
+workspace: "./"
+
+# list of dataset
+dataset:
+- name: dataset_infer # name
+  batch_size: 128
+  type: DataLoader
+  data_path: "{workspace}/data/test"
+  sparse_slots: "logid time userid gender age occupation movieid title genres label"
+  dense_slots: ""
+
+# hyper parameters of user-defined network
+hyper_parameters:
+  # optimizer config
+  optimizer:
+    class: Adam
+    learning_rate: 0.001
+    strategy: async
+  # user-defined <key, value> pairs
+  sparse_feature_number: 60000000
+  sparse_feature_dim: 9
+  dense_input_dim: 13
+  fc_sizes: [512, 256, 128, 32]
+
+# train
+mode: runner_infer
+
+## online or offline infer
+#mode: runner_infer
+runner:
+- name: runner_infer
+  epochs: 1
+  device: cpu
+  class: infer
+  print_interval: 10000
+  runner_result_dump_path: "{workspace}/rank/infer_result"
+  init_model_path: "increment_rank/9" # load model path
+
+#offline infer
+phase:
+- name: phase1
+  model: "{workspace}/rank/model.py" # user-defined model
+  dataset_name: dataset_infer # select dataset by name
+  thread_num: 1
+