[Test-driven] Implementing label_semantic_role with new API (#10757)

sidgoyal78 · web-flow · commit 87ff95d92828 · 2018-05-24T13:36:32.000-07:00
* Update label role implementation to use new API

* Try trainable embedding layer

* Fix feed order

* Add infer call
diff --git a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt
@@ -10,5 +10,6 @@ add_subdirectory(fit_a_line)
 add_subdirectory(recognize_digits)
 add_subdirectory(image_classification)
 add_subdirectory(understand_sentiment)
+add_subdirectory(label_semantic_roles)
 add_subdirectory(word2vec)
 add_subdirectory(recommender_system)
diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+
+# default test
+foreach(src ${TEST_OPS})
+    py_test(${src} SRCS ${src}.py)
+endforeach()
diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py
@@ -16,21 +16,23 @@
 
 import paddle
 import paddle.fluid as fluid
-import numpy
+import numpy as np
 
 WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict()
 WORD_DICT_LEN = len(WORD_DICT)
 LABEL_DICT_LEN = len(LABEL_DICT)
 PRED_DICT_LEN = len(VERB_DICT)
 MARK_DICT_LEN = 2
+IS_SPARSE = True
+BATCH_SIZE = 10
+EMBEDDING_NAME = 'emb'
 
 
-def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
+def lstm_net():
     WORD_DIM = 32
     MARK_DIM = 5
     HIDDEN_DIM = 512
     DEPTH = 8
-    EMBEDDING_NAME = 'emb'
 
     # Data definitions
     word = fluid.layers.data(
@@ -69,8 +71,9 @@ def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
         fluid.layers.embedding(
             size=[WORD_DICT_LEN, WORD_DIM],
             input=x,
-            param_attr=fluid.ParamAttr(
-                name=EMBEDDING_NAME, trainable=False)) for x in word_input
+            param_attr=fluid.ParamAttr(name=EMBEDDING_NAME))
+        for x in word_input
+        #name=EMBEDDING_NAME, trainable=False)) for x in word_input
     ]
     emb_layers.append(predicate_embedding)
     emb_layers.append(mark_embedding)
@@ -116,21 +119,16 @@ def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
     return feature_out
 
 
-def inference_network():
-    predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,
-                       mark)
+def inference_program():
+    predict = lstm_net()
 
-    crf_decode = fluid.layers.crf_decoding(
-        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
+    return predict
 
-    return crf_decode
 
-
-def train_network():
+def train_program():
     MIX_HIDDEN_LR = 1e-3
 
-    predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,
-                       mark)
+    predict = lstm_net()
     target = fluid.layers.data(
         name='target', shape=[1], dtype='int64', lod_level=1)
     crf_cost = fluid.layers.linear_chain_crf(
@@ -140,44 +138,66 @@ def train_network():
             name='crfw', learning_rate=MIX_HIDDEN_LR))
     avg_cost = fluid.layers.mean(crf_cost)
 
-    return avg_cost
+    return [avg_cost]
 
 
-def train(use_cuda, save_path):
-    BATCH_SIZE = 128
-    EPOCH_NUM = 1
+def train(use_cuda, train_program, save_path):
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    optimizer = fluid.optimizer.SGD(learning_rate=0.01)
 
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.conll05.train(), buf_size=8192),
-        batch_size=BATCH_SIZE)
-    test_reader = paddle.batch(
-        paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
+    trainer = fluid.Trainer(
+        train_func=train_program, place=place, optimizer=optimizer)
 
-    def event_handler(event):
-        if isinstance(event, fluid.EndIteration):
-            if (event.batch_id % 10) == 0:
-                avg_cost = trainer.test(reader=test_reader)
+    feed_order = [
+        'word_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
+        'ctx_p2_data', 'verb_data', 'mark_data', 'target'
+    ]
 
-                print('BatchID {0:04}, Loss {1:2.2}'.format(event.batch_id + 1,
-                                                            avg_cost))
+    #embedding_param = fluid.global_scope().find_var(
+    #        EMBEDDING_NAME).get_tensor()
+    #embedding_param.set(
+    #        load_parameter(conll05.get_embedding(), WORD_DICT_LEN, WORD_DIM),
+    #        place)
 
-                if avg_cost > 0.01:  # Low threshold for speeding up CI
-                    trainer.save_params(save_path)
-                    return
+    def event_handler(event):
+        if isinstance(event, fluid.EndEpochEvent):
+            test_reader = paddle.batch(
+                paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
+            avg_cost_set = trainer.test(
+                reader=test_reader, feed_order=feed_order)
+
+            # get avg cost
+            avg_cost = np.array(avg_cost_set).mean()
+
+            print("avg_cost: %s" % avg_cost)
+
+            if float(avg_cost) < 100.0:  # Large value to increase CI speed
+                trainer.save_params(save_path)
+            else:
+                print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
+                                                              float(avg_cost)))
+                if math.isnan(float(avg_cost)):
+                    sys.exit("got NaN loss, training failed.")
+
+        elif isinstance(event, fluid.EndStepEvent):
+            print("Step {0}, Epoch {1} Metrics {2}".format(
+                event.step, event.epoch, map(np.array, event.metrics)))
+            if event.step == 1:  # Run 2 iterations to speed CI
+                trainer.save_params(save_path)
+                trainer.stop()
 
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    sgd_optimizer = fluid.optimizer.SGD(
-        learning_rate=fluid.layers.exponential_decay(
-            learning_rate=0.01,
-            decay_steps=100000,
-            decay_rate=0.5,
-            staircase=True))
-    trainer = fluid.Trainer(train_network, optimizer=sgd_optimizer, place=place)
-    trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler)
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.conll05.test(), buf_size=8192),
+        batch_size=BATCH_SIZE)
+    trainer.train(
+        num_epochs=1,
+        event_handler=event_handler,
+        reader=train_reader,
+        feed_order=feed_order)
 
 
-def infer(use_cuda, save_path):
+def infer(use_cuda, inference_program, save_path):
     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
     inferencer = fluid.Inferencer(
         inference_program, param_path=save_path, place=place)
@@ -201,26 +221,28 @@ def create_random_lodtensor(lod, place, low, high):
     ctx_p2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
     mark = create_random_lodtensor(lod, place, low=0, high=MARK_DICT_LEN - 1)
 
-    results = inferencer.infer({
-        'word_data': word,
-        'verb_data': pred,
-        'ctx_n2_data': ctx_n2,
-        'ctx_n1_data': ctx_n1,
-        'ctx_0_data': ctx_0,
-        'ctx_p1_data': ctx_p1,
-        'ctx_p2_data': ctx_p2,
-        'mark_data': mark
-    })
+    results = inferencer.infer(
+        {
+            'word_data': word,
+            'verb_data': pred,
+            'ctx_n2_data': ctx_n2,
+            'ctx_n1_data': ctx_n1,
+            'ctx_0_data': ctx_0,
+            'ctx_p1_data': ctx_p1,
+            'ctx_p2_data': ctx_p2,
+            'mark_data': mark
+        },
+        return_numpy=False)
 
-    print("infer results: ", results)
+    print("infer results: ", np.array(results[0]))
 
 
 def main(use_cuda):
     if use_cuda and not fluid.core.is_compiled_with_cuda():
         return
     save_path = "label_semantic_roles.inference.model"
-    train(use_cuda, save_path)
-    infer(use_cuda, save_path)
+    train(use_cuda, train_program, save_path)
+    infer(use_cuda, inference_program, save_path)
 
 
 if __name__ == '__main__':