Support stride dy2st (#6712)

wanghuancoder · web-flow · commit d2c228500ebf · 2023-08-14T17:17:35.000+08:00
diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/single_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/single_model.py
@@ -1079,28 +1079,56 @@ def _post_process_(outputs, input_ids, cur_len, origin_len, scores, unfinished_f
         # make the shape of attention_mask = (-1, -1, -1, -1) in dy2static.
         model_kwargs["attention_mask"] = paddle.reshape(attn_mask, paddle.shape(attn_mask))
         model_kwargs["cache"] = outputs[1] if isinstance(outputs, tuple) else None
-        while cur_len < max_length:
-            # Note(GuoxiaWang): Remove outputs = _forward_(**model_kwargs)
-            # and change it to pass directly to _post_process_ to avoid
-            # closed-loop problem of dynamic-to-static model
-            input_ids, scores, unfinished_flag, model_kwargs = _post_process_(
-                _forward_(**model_kwargs),
-                input_ids,
-                cur_len_gpu,
-                origin_len_gpu,
-                scores,
-                unfinished_flag,
-                model_kwargs,
-            )
-            if not self.inference:
-                cur_len += 1
-            else:
-                # Note(ZhenyuLi): Avoid the synchronization caused by scale in dy2static
-                paddle.increment(cur_len)
-            paddle.increment(cur_len_gpu)
+        if hasattr(paddle.framework, "_no_check_dy2st_diff"):
+            # TODO(wanghuancoder): _no_check_dy2st_diff is used to turn off the checking of behavior
+            # inconsistency between dynamic graph and static graph. _no_check_dy2st_diff should be
+            # removed after static graphs support inplace and stride.
+            with paddle.framework._no_check_dy2st_diff():
+                while cur_len < max_length:
+                    # Note(GuoxiaWang): Remove outputs = _forward_(**model_kwargs)
+                    # and change it to pass directly to _post_process_ to avoid
+                    # closed-loop problem of dynamic-to-static model
+                    input_ids, scores, unfinished_flag, model_kwargs = _post_process_(
+                        _forward_(**model_kwargs),
+                        input_ids,
+                        cur_len_gpu,
+                        origin_len_gpu,
+                        scores,
+                        unfinished_flag,
+                        model_kwargs,
+                    )
+                    if not self.inference:
+                        cur_len += 1
+                    else:
+                        # Note(ZhenyuLi): Avoid the synchronization caused by scale in dy2static
+                        paddle.increment(cur_len)
+                    paddle.increment(cur_len_gpu)
+
+                    if not paddle.any(unfinished_flag):
+                        break
+        else:
+            while cur_len < max_length:
+                # Note(GuoxiaWang): Remove outputs = _forward_(**model_kwargs)
+                # and change it to pass directly to _post_process_ to avoid
+                # closed-loop problem of dynamic-to-static model
+                input_ids, scores, unfinished_flag, model_kwargs = _post_process_(
+                    _forward_(**model_kwargs),
+                    input_ids,
+                    cur_len_gpu,
+                    origin_len_gpu,
+                    scores,
+                    unfinished_flag,
+                    model_kwargs,
+                )
+                if not self.inference:
+                    cur_len += 1
+                else:
+                    # Note(ZhenyuLi): Avoid the synchronization caused by scale in dy2static
+                    paddle.increment(cur_len)
+                paddle.increment(cur_len_gpu)
 
-            if not paddle.any(unfinished_flag):
-                break
+                if not paddle.any(unfinished_flag):
+                    break
 
         return model_kwargs["res"][:, origin_len:], scores