【bugfix】fix amp bugs and remove topo sort in base/backward.py (#60039)

2742195759 · web-flow · commit 92f1fb70f551 · 2023-12-15T15:58:35.000+08:00
* fix and remove topo order effect (#59996) * 【AMP/SOT/PIR】fix amp bugs in yolo_v5 and add unittest (#59896) * fix amp bugs in yolo_v5 and add unittest * add bf16 * fix-amp-bugs * Update fp16_utils.py
diff --git a/python/paddle/base/backward.py b/python/paddle/base/backward.py
@@ -539,6 +539,8 @@ def _addup_repetitive_outputs_(
     var_device = collections.defaultdict(str)
 
     def _change_order_by_topo_order(var_name):
+        if topo_order_for_backward is None:
+            return
         origin_names = renamed_vars[var_name]
         origin_names.sort(key=lambda x: topo_order_for_grad_name[x])
 
@@ -1596,12 +1598,12 @@ def find_op_index(block_desc, cur_op_desc):
             program._appending_grad_times
         ]
     # sum parameter's gradients' var given multiple var gradient
-    topo_order = _topo_order_map(block, target_vars)
     if os.environ.get("FLAGS_program_topo_reorder", "False") in [
         'True',
         '1',
         'true',
     ]:
+        topo_order = _topo_order_map(block, target_vars)
         topo_order_for_backward = _topo_bwd_order_map(
             topo_order, get_backward_op_desc
         )
diff --git a/python/paddle/static/amp/fp16_utils.py b/python/paddle/static/amp/fp16_utils.py
@@ -262,7 +262,7 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype):
                 _rename_arg(op, in_var.name, out_var.name)
 
     for attr_name in ['in_dtype', 'out_dtype', 'dtype']:
-        if op.has_attr(attr_name) and is_float_dtype(op.attr(attr_name)):
+        if op.has_attr(attr_name) and op.attr(attr_name) in FLOAT_TYPES:
             op._set_attr(attr_name, dest_dtype)
 
     return num_cast_ops
@@ -405,13 +405,18 @@ def fp16_guard():
         yield
 
 
-def is_float_dtype(dtype):
-    return (
-        dtype == core.VarDesc.VarType.FP32
-        or dtype == core.VarDesc.VarType.FP16
-        or dtype == core.VarDesc.VarType.BF16
-        or dtype == core.VarDesc.VarType.FP64
-    )
+FLOAT_TYPES = {
+    core.VarDesc.VarType.FP32,
+    core.VarDesc.VarType.FP16,
+    core.VarDesc.VarType.BF16,
+    core.VarDesc.VarType.FP64,
+}
+
+SUPPORT_FLOAT_TYPES = {
+    core.VarDesc.VarType.FP32,
+    core.VarDesc.VarType.FP16,
+    core.VarDesc.VarType.BF16,
+}
 
 
 def set_var_dst_dtype(
@@ -433,7 +438,7 @@ def set_var_dst_dtype(
         if var is None or var.type not in _valid_types:
             continue
 
-        if is_float_dtype(var.dtype):
+        if var.dtype in FLOAT_TYPES:
             low_precison_var_names.add(var_name)
             if need_set_dtype:
                 var.desc.set_dtype(dtype)
@@ -700,6 +705,25 @@ def cast_model_to_fp16(
 
     def need_process(op):
         need_process = True
+
+        def is_support_type(name):
+            if not op.block._find_var_recursive(
+                name
+            ):  # a special case for lod_tensor_blocking_queue_0
+                return True
+            if (
+                op.block._var_recursive(name).type
+                != core.VarDesc.VarType.LOD_TENSOR
+            ):
+                return False
+            return op.block._var_recursive(name).dtype in SUPPORT_FLOAT_TYPES
+
+        if len(op.input_arg_names) > 0 and all(
+            not is_support_type(name) for name in op.input_arg_names
+        ):
+            return False
+
+        # if input type of op is fp64, we just skip it.
         if op.type in ["set_value"]:
             # NOTE(zoooo0820): OP set_value has attribute "dtype", but its output type is
             # determined by the input.dtype instead of attribute. So, here we still process it.
@@ -711,8 +735,7 @@ def need_process(op):
                 # output type of some operators such as fill_constant will be determined by the attribute value.
                 #
                 if not op.has_attr('in_dtype') and (
-                    op.has_attr(attr_name)
-                    and is_float_dtype(op.attr(attr_name))
+                    op.has_attr(attr_name) and op.attr(attr_name) in FLOAT_TYPES
                 ):
                     need_process = False
 
diff --git a/test/dygraph_to_static/test_amp_fp64_case.py b/test/dygraph_to_static/test_amp_fp64_case.py
@@ -0,0 +1,48 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from dygraph_to_static_utils import (
+    Dy2StTestBase,
+    test_legacy_and_pt,
+)
+
+import paddle
+
+np.random.seed(1)
+
+
+def func(x):
+    y = x[0:3].astype("float32")
+    return y
+
+
+class TestAmp64Case(Dy2StTestBase):
+    def _run_static(self):
+        static_func = paddle.jit.to_static(func)
+        x = paddle.randn((10, 10)).astype("float64")
+        with paddle.amp.auto_cast(True, level="O2"):
+            dy_out = func(x)
+            st_out = static_func(x)
+        np.testing.assert_allclose(dy_out.numpy(), st_out.numpy())
+
+    @test_legacy_and_pt
+    def test_ast_to_func(self):
+        self._run_static()
+
+
+if __name__ == '__main__':
+    unittest.main()