add distill example in tutorial, and update augmentation condition (#619)

LiuChiachi · web-flow · commit 53f181d87a2e · 2021-06-24T16:21:01.000+08:00
diff --git a/education/day12.md b/education/day12.md
@@ -99,4 +99,4 @@ from paddlenlp.transformers import ErnieGramForSequenceClassification, ErnieGram
 teacher = ErnieGramForSequenceClassification.from_pretrained("./tmp/ChnSentiCorp/best_model")
 ```
 
-蒸馏的过程同AI studio教程，这里就不再赘述啦~同学们按着与教程相同的步骤进行即可。
+蒸馏的过程同AI studio教程，这里就不再赘述啦~同学们按着与教程相同的步骤进行即可。同时，本repo中也提供了一个[从BERT到Bi-LSTM蒸馏](../examples/model_compression/distill_lstm)的例子可供参考。
diff --git a/examples/model_compression/distill_lstm/data.py b/examples/model_compression/distill_lstm/data.py
@@ -147,6 +147,8 @@ def apply_data_augmentation_for_cn(data,
     new_data = []
 
     for example in data:
+        if not example['text']:
+            continue
         text_tokenized = list(jieba.cut(example['text']))
         lstm_tokens = text_tokenized
         bert_tokens = tokenizer.tokenize(example['text'])
@@ -170,11 +172,12 @@ def apply_data_augmentation_for_cn(data,
                                                       p_ng, ngram_range)
             lstm_tokens, bert_tokens = flatten(lstm_tokens), flatten(
                 bert_tokens)
-            new_data.append({
-                "lstm_tokens": lstm_tokens,
-                "bert_tokens": bert_tokens,
-                "label": example['label']
-            })
+            if lstm_tokens and bert_tokens:
+                new_data.append({
+                    "lstm_tokens": lstm_tokens,
+                    "bert_tokens": bert_tokens,
+                    "label": example['label']
+                })
     return new_data