From b6a7783bf869eafbdc402aa9e24f6177d44533d6 Mon Sep 17 00:00:00 2001 From: RbRe145 Date: Tue, 23 Sep 2025 13:29:27 +0000 Subject: [PATCH 1/4] fix model's hash and json --- .../google/t5-efficient-base-kv128/graph_hash.txt | 1 + .../google/t5-efficient-base-kv16/graph_hash.txt | 1 + .../google/t5-efficient-base-kv256/graph_hash.txt | 1 + .../google/t5-efficient-base-kv32/graph_hash.txt | 1 + .../google/t5-efficient-base-nh16/graph_hash.txt | 1 + .../google/t5-efficient-base-nh24/graph_hash.txt | 1 + .../google/t5-efficient-base-nh32/graph_hash.txt | 1 + .../google/t5-efficient-base-nh8/graph_hash.txt | 1 + .../google/t5-efficient-base-nl24/graph_hash.txt | 1 + .../google/t5-efficient-base-nl32/graph_hash.txt | 1 + .../google/t5-efficient-base-nl36/graph_hash.txt | 1 + .../google/t5-efficient-base-nl40/graph_hash.txt | 1 + .../google/t5-efficient-base-nl48/graph_hash.txt | 1 + .../google/t5-efficient-large-dl12/graph_hash.txt | 1 + .../google/t5-efficient-large-el2/graph_hash.txt | 1 + .../google/t5-efficient-large-el4/graph_hash.txt | 1 + .../google/t5-efficient-large-el6/graph_hash.txt | 1 + .../google/t5-efficient-large-el8/graph_hash.txt | 1 + .../google/t5-efficient-large-kv128/graph_hash.txt | 1 + .../google/t5-efficient-large-kv16/graph_hash.txt | 1 + .../google/t5-efficient-large-kv256/graph_hash.txt | 1 + .../google/t5-efficient-large-kv32/graph_hash.txt | 1 + .../google/t5-efficient-large-nh2/graph_hash.txt | 1 + .../google/t5-efficient-large-nh24/graph_hash.txt | 1 + .../google/t5-efficient-large-nh32/graph_hash.txt | 1 + .../google/t5-efficient-large-nh4/graph_hash.txt | 1 + .../google/t5-efficient-large-nh8-nl32/graph_hash.txt | 1 + .../google/t5-efficient-large-nh8/graph_hash.txt | 1 + .../google/t5-efficient-large-nl10/graph_hash.txt | 1 + .../google/t5-efficient-large-nl16/graph_hash.txt | 1 + .../google/t5-efficient-large-nl20/graph_hash.txt | 1 + .../google/t5-efficient-large-nl32/graph_hash.txt | 1 + 32 files changed, 32 insertions(+) create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-kv128/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-kv16/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-kv256/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-kv32/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nh16/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nh24/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nh32/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nh8/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nl24/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nl32/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nl36/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nl40/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-base-nl48/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-dl12/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-el2/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-el4/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-el6/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-el8/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-kv128/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-kv16/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-kv256/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-kv32/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nh2/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nh24/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nh32/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nh4/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nh8-nl32/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nh8/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nl10/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nl16/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nl20/graph_hash.txt create mode 100644 samples/transformers-auto-model/google/t5-efficient-large-nl32/graph_hash.txt diff --git a/samples/transformers-auto-model/google/t5-efficient-base-kv128/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-kv128/graph_hash.txt new file mode 100644 index 000000000..cfcd27b7a --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-kv128/graph_hash.txt @@ -0,0 +1 @@ +51e4b7c542183b28cf684e21105bd420f3c15c88c3565ed50d246c3c8f5e5278 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-kv16/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-kv16/graph_hash.txt new file mode 100644 index 000000000..015bbc7aa --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-kv16/graph_hash.txt @@ -0,0 +1 @@ +2c690e763e6a306e0826564467c85af74792139726c4e388a916ca301c84b54f \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-kv256/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-kv256/graph_hash.txt new file mode 100644 index 000000000..08bbd4129 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-kv256/graph_hash.txt @@ -0,0 +1 @@ +ac84bafb06e423018771c4f001432d61aba15f52336c9bb50e40038569759206 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-kv32/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-kv32/graph_hash.txt new file mode 100644 index 000000000..3ca9de559 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-kv32/graph_hash.txt @@ -0,0 +1 @@ +f221f7b0376e5209452b2b70b9f298e33e6cb90433ff3ceaf6283dbc66ab5b23 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nh16/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nh16/graph_hash.txt new file mode 100644 index 000000000..f9db31088 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nh16/graph_hash.txt @@ -0,0 +1 @@ +6d926a6f23b9a9be70782e5b40821fad7062454d17b7a91fb0aa14175357ecf6 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nh24/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nh24/graph_hash.txt new file mode 100644 index 000000000..407374099 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nh24/graph_hash.txt @@ -0,0 +1 @@ +91714bc0491793a7ba200d13327fb318ffa77f210eff3d5f79906fab21ac5ebe \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nh32/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nh32/graph_hash.txt new file mode 100644 index 000000000..2d9bbd0c7 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nh32/graph_hash.txt @@ -0,0 +1 @@ +44cd4571d13a2179afc9079932dafa5b75ceab1dc251a1529bb1b9e95be578b1 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nh8/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nh8/graph_hash.txt new file mode 100644 index 000000000..ab3440384 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nh8/graph_hash.txt @@ -0,0 +1 @@ +0e6cc2de7066ca6742d73e7aa2fd65c18492696bef6d992f4284839c07e3a2e7 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nl24/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nl24/graph_hash.txt new file mode 100644 index 000000000..8a6b5d4d6 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nl24/graph_hash.txt @@ -0,0 +1 @@ +9850b460106189e9acf25fc43ce61d9245a7d6d384edd99faeea1f2d913741ed \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nl32/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nl32/graph_hash.txt new file mode 100644 index 000000000..31e7f9cd8 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nl32/graph_hash.txt @@ -0,0 +1 @@ +c795de753c5c6fcaaa4df739ece68037f8c577f089759c068bbe8e7610bf7e98 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nl36/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nl36/graph_hash.txt new file mode 100644 index 000000000..a91b50894 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nl36/graph_hash.txt @@ -0,0 +1 @@ +e71ae02a715037931ed3ae897878167e8484bbcbb63d56445ddb0e1cc525e4bb \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nl40/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nl40/graph_hash.txt new file mode 100644 index 000000000..04d34bb04 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nl40/graph_hash.txt @@ -0,0 +1 @@ +4aeba0ecc3f62c67dd85542b3bca4ab44bc7f78ab18bab99953394c03d881b03 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-base-nl48/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-base-nl48/graph_hash.txt new file mode 100644 index 000000000..7ce33b90d --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-base-nl48/graph_hash.txt @@ -0,0 +1 @@ +93514c3328a569c3227db5b50d925909e5d934efb8ccdf54324f1a0b0838ba17 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-dl12/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-dl12/graph_hash.txt new file mode 100644 index 000000000..fab3ac478 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-dl12/graph_hash.txt @@ -0,0 +1 @@ +059ab0fa0a3dbbfdbe6a0b800ec63351e36b56c5fc8412709f746954a87ebbf9 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-el2/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-el2/graph_hash.txt new file mode 100644 index 000000000..d24f2f69c --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-el2/graph_hash.txt @@ -0,0 +1 @@ +8cb732beed93c3a710b0eb430de39d9f4ad357846cfe461dc27f178d0e9d232a \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-el4/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-el4/graph_hash.txt new file mode 100644 index 000000000..3733fd158 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-el4/graph_hash.txt @@ -0,0 +1 @@ +1f06bbbd38a25a53a8f038d8b5f1eacc3515ad6212b8b900c22cd2da03d9a698 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-el6/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-el6/graph_hash.txt new file mode 100644 index 000000000..6e9a6915e --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-el6/graph_hash.txt @@ -0,0 +1 @@ +47a7e4cb097c127b69d5a17be2ae2bff7b56b37a8a56dcf60f7c553e9e078d38 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-el8/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-el8/graph_hash.txt new file mode 100644 index 000000000..9207e79e9 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-el8/graph_hash.txt @@ -0,0 +1 @@ +9f35c32ef0a7ee3d3ae5f84a2093ee5e14fc764d062d8943c28de2a5c8a8e4ce \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-kv128/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-kv128/graph_hash.txt new file mode 100644 index 000000000..08668239f --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-kv128/graph_hash.txt @@ -0,0 +1 @@ +2781951f366c05172fe709789e26d1c62272f51b20164df6ea85c3fc7427a82d \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-kv16/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-kv16/graph_hash.txt new file mode 100644 index 000000000..3fecf8159 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-kv16/graph_hash.txt @@ -0,0 +1 @@ +199bf77e61c0101558b5b7780bb46c4c54baab4f69eb15b6b41502fbff14afa3 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-kv256/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-kv256/graph_hash.txt new file mode 100644 index 000000000..164a105a1 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-kv256/graph_hash.txt @@ -0,0 +1 @@ +7d74c207c0968b2e19a31649462a08fafba36650a8ae68da8d2b0bcfba16c6b0 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-kv32/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-kv32/graph_hash.txt new file mode 100644 index 000000000..1bd7e6a6a --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-kv32/graph_hash.txt @@ -0,0 +1 @@ +eff65c086eaa1c4084e7eefb1ae75801e3550e8ede05a95ecb99282e249846ac \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nh2/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nh2/graph_hash.txt new file mode 100644 index 000000000..a6e70010b --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nh2/graph_hash.txt @@ -0,0 +1 @@ +6184647728f37854e2fade80d3698a7bdf63438ef9c14fc5ea3c78324ada14af \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nh24/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nh24/graph_hash.txt new file mode 100644 index 000000000..0a85121f1 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nh24/graph_hash.txt @@ -0,0 +1 @@ +889e789a15be4caf0606d0fb41c544e9fdf3418dff7bf2a4f7c15f70bbc65ad0 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nh32/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nh32/graph_hash.txt new file mode 100644 index 000000000..122a7a64f --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nh32/graph_hash.txt @@ -0,0 +1 @@ +0b955d2404a5a80f0ecd8a8ffb027d7536d5740ec9286f2a0870493643c1d6ac \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nh4/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nh4/graph_hash.txt new file mode 100644 index 000000000..0fdecb31e --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nh4/graph_hash.txt @@ -0,0 +1 @@ +75428df2efbff5477e120014af028467121ce02e9c51672c6881d1e3b83579fb \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nh8-nl32/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nh8-nl32/graph_hash.txt new file mode 100644 index 000000000..f6c83fa65 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nh8-nl32/graph_hash.txt @@ -0,0 +1 @@ +89e7d8480a29d73c7e3045e43aa4dcb8f4ab41b6748ac1fd27de7547d0ac9d0c \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nh8/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nh8/graph_hash.txt new file mode 100644 index 000000000..3e3d7fb69 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nh8/graph_hash.txt @@ -0,0 +1 @@ +03ef8a94dcdb3d506ac7dc06077de6f83df52d9714684027f48a2f9eee00a461 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nl10/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nl10/graph_hash.txt new file mode 100644 index 000000000..57fe2ca4f --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nl10/graph_hash.txt @@ -0,0 +1 @@ +f154eaa8e7631a60a84cf3db2b3fb06aad6091641b8bf0b85452ed4829baed5f \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nl16/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nl16/graph_hash.txt new file mode 100644 index 000000000..e7570e147 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nl16/graph_hash.txt @@ -0,0 +1 @@ +2157295077252c1c0494c30d060983924261304d83e9bef7ae28e9d6caa7c7a1 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nl20/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nl20/graph_hash.txt new file mode 100644 index 000000000..310aa50f3 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nl20/graph_hash.txt @@ -0,0 +1 @@ +84aa982af7c3457829feb08e64577cf39221686103e65189de45c2471df26821 \ No newline at end of file diff --git a/samples/transformers-auto-model/google/t5-efficient-large-nl32/graph_hash.txt b/samples/transformers-auto-model/google/t5-efficient-large-nl32/graph_hash.txt new file mode 100644 index 000000000..23fec69f2 --- /dev/null +++ b/samples/transformers-auto-model/google/t5-efficient-large-nl32/graph_hash.txt @@ -0,0 +1 @@ +c57bbc9f8323de46604b8a6f9db7b00e9637d8b118b5a438a290cacdf0faeb34 \ No newline at end of file From 873fa298f5ea1ec059a7c17f42d3550e93e7beea Mon Sep 17 00:00:00 2001 From: RbRe145 Date: Thu, 25 Sep 2025 07:41:26 +0000 Subject: [PATCH 2/4] add new bart and xlnet models --- graph_net/test/nlp_model_getter.py | 47 + .../PaddleNLP/bart-base/graph_net.json | 6 + .../PaddleNLP/bart-base/input_meta.py | 27 + paddle_samples/PaddleNLP/bart-base/model.py | 3182 +++++++ .../PaddleNLP/bart-base/weight_meta.py | 2847 ++++++ .../chinese-xlnet-base/graph_net.json | 6 + .../chinese-xlnet-base/input_meta.py | 19 + .../PaddleNLP/chinese-xlnet-base/model.py | 4369 +++++++++ .../chinese-xlnet-base/weight_meta.py | 2048 ++++ .../chinese-xlnet-large/graph_net.json | 6 + .../chinese-xlnet-large/input_meta.py | 19 + .../PaddleNLP/chinese-xlnet-large/model.py | 8389 +++++++++++++++++ .../chinese-xlnet-large/weight_meta.py | 4076 ++++++++ .../chinese-xlnet-mid/graph_net.json | 6 + .../PaddleNLP/chinese-xlnet-mid/input_meta.py | 19 + .../PaddleNLP/chinese-xlnet-mid/model.py | 8389 +++++++++++++++++ .../chinese-xlnet-mid/weight_meta.py | 4076 ++++++++ .../PaddleNLP/xlnet-base-cased/graph_net.json | 6 + .../PaddleNLP/xlnet-base-cased/input_meta.py | 42 + .../PaddleNLP/xlnet-base-cased/model.py | 4369 +++++++++ .../PaddleNLP/xlnet-base-cased/weight_meta.py | 2048 ++++ .../xlnet-large-cased/graph_net.json | 6 + .../PaddleNLP/xlnet-large-cased/input_meta.py | 42 + .../PaddleNLP/xlnet-large-cased/model.py | 8389 +++++++++++++++++ .../xlnet-large-cased/weight_meta.py | 4076 ++++++++ 25 files changed, 56509 insertions(+) create mode 100644 paddle_samples/PaddleNLP/bart-base/graph_net.json create mode 100644 paddle_samples/PaddleNLP/bart-base/input_meta.py create mode 100644 paddle_samples/PaddleNLP/bart-base/model.py create mode 100644 paddle_samples/PaddleNLP/bart-base/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-base/graph_net.json create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-base/input_meta.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-base/model.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-base/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-large/graph_net.json create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-large/input_meta.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-large/model.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-large/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-mid/graph_net.json create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-mid/input_meta.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-mid/model.py create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-mid/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/xlnet-base-cased/graph_net.json create mode 100644 paddle_samples/PaddleNLP/xlnet-base-cased/input_meta.py create mode 100644 paddle_samples/PaddleNLP/xlnet-base-cased/model.py create mode 100644 paddle_samples/PaddleNLP/xlnet-base-cased/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/xlnet-large-cased/graph_net.json create mode 100644 paddle_samples/PaddleNLP/xlnet-large-cased/input_meta.py create mode 100644 paddle_samples/PaddleNLP/xlnet-large-cased/model.py create mode 100644 paddle_samples/PaddleNLP/xlnet-large-cased/weight_meta.py diff --git a/graph_net/test/nlp_model_getter.py b/graph_net/test/nlp_model_getter.py index abebfee4d..5ce710b24 100644 --- a/graph_net/test/nlp_model_getter.py +++ b/graph_net/test/nlp_model_getter.py @@ -107,3 +107,50 @@ def get_skep_model_and_inputs(model_name, text, dtype): tokenizer = TokenizerClass.from_pretrained(model_name) inputs = tokenizer(text, return_tensors="pd") return model, inputs + + +def get_bart_model_and_inputs(model_name, text, dtype): + from paddlenlp.transformers import BartModel, BartTokenizer + + model = BartModel.from_pretrained(model_name) + model.eval() + + tokenizer = BartTokenizer.from_pretrained(model_name) + + inputs = tokenizer( + text, + return_tensors="pd", + padding=True, + truncation=True, + max_length=512, + ) + inputs.pop("token_type_ids", None) + + return model, inputs + + +def get_xlnet_model_and_inputs(model_name, text, dtype): + import paddle + from paddlenlp.transformers import XLNetModel, XLNetTokenizer, XLNetConfig + + config = XLNetConfig.from_pretrained(model_name) + model = XLNetModel(config) + if dtype == "float16": + model = model.astype(paddle.float16) + model.eval() + + tokenizer = XLNetTokenizer.from_pretrained(model_name) + + enc = tokenizer( + text, + return_tensors="pd", + padding=True, + truncation=True, + # max_length=512, + ) + if "attention_mask" not in enc: + input_ids = enc["input_ids"] + pad_id = tokenizer.pad_token_id + enc["attention_mask"] = (input_ids != pad_id).astype("int64") + + return model, enc diff --git a/paddle_samples/PaddleNLP/bart-base/graph_net.json b/paddle_samples/PaddleNLP/bart-base/graph_net.json new file mode 100644 index 000000000..25c5098dc --- /dev/null +++ b/paddle_samples/PaddleNLP/bart-base/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "bart-base", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/bart-base/input_meta.py b/paddle_samples/PaddleNLP/bart-base/input_meta.py new file mode 100644 index 000000000..91995ce2c --- /dev/null +++ b/paddle_samples/PaddleNLP/bart-base/input_meta.py @@ -0,0 +1,27 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 21] + dtype = "int64" + data = [ + 0, + 31414, + 6, + 127, + 766, + 16, + 3045, + 4, + 38, + 524, + 2239, + 59, + 739, + 2777, + 3092, + 8, + 49, + 41885, + 4, + 1437, + 2, + ] diff --git a/paddle_samples/PaddleNLP/bart-base/model.py b/paddle_samples/PaddleNLP/bart-base/model.py new file mode 100644 index 000000000..50c980186 --- /dev/null +++ b/paddle_samples/PaddleNLP/bart-base/model.py @@ -0,0 +1,3182 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + data_0, + ): + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x21xi64) <- (1x21xi64, 1xf32) + full_like_0 = paddle._C_ops.full_like( + data_0, full_0, paddle.int64, paddle.framework._current_expected_place() + ) + del full_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.slice: (1x20xi64) <- (1x21xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + data_0, [1], full_int_array_0, full_int_array_1, [1], [] + ) + + # pd_op.assign: (1x20xi64) <- (1x20xi64) + assign_0 = slice_0 + del slice_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [1] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [2147483647] + + # pd_op.set_value_with_tensor_: (1x21xi64) <- (1x21xi64, 1x20xi64, 1xi64, 1xi64, 1xi64) + set_value_with_tensor__0 = paddle._C_ops.set_value_with_tensor_( + full_like_0, + assign_0, + full_int_array_2, + full_int_array_3, + full_int_array_2, + [1], + [], + [], + ) + del assign_0, full_like_0 + + # pd_op.set_value_: (1x21xi64) <- (1x21xi64, 1xi64, 1xi64, 1xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value_with_tensor__0, + full_int_array_0, + full_int_array_2, + full_int_array_2, + [1], + [1], + [], + [1], + [float("2")], + ) + del full_int_array_0, full_int_array_2, set_value_with_tensor__0 + + # pd_op.full: (xi64) <- () + full_1 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x21xb) <- (1x21xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_1) + del full_1 + + # pd_op.cast: (1x21xf32) <- (1x21xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x21xf32) <- (1x21xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(cast_0, full_int_array_4) + del cast_0, full_int_array_4 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_0 = paddle._C_ops.scale(unsqueeze_0, full_2, float("0"), True) + del full_2, unsqueeze_0 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [-1, 21] + + # pd_op.reshape: (1x21xi64) <- (1x21xi64, 2xi64) + reshape_0 = paddle._C_ops.reshape(data_0, full_int_array_5) + del data_0 + + # pd_op.embedding: (1x21x768xf32) <- (1x21xi64, 50265x768xf32) + embedding_0 = paddle._C_ops.embedding(reshape_0, parameter_258, -1, False) + del reshape_0 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x768xf32) <- (1x21x768xf32, 1xf32) + scale_1 = paddle._C_ops.scale(embedding_0, full_3, float("0"), True) + del embedding_0 + + # pd_op.full: (1xf64) <- () + full_4 = paddle._C_ops.full( + [1], float("0"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_5 = paddle._C_ops.full( + [1], float("21"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_6 = paddle._C_ops.full( + [1], float("1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (21xi64) <- (1xf64, 1xf64, 1xf64) + arange_0 = paddle.arange(full_4, full_5, full_6, dtype="int64") + del full_4, full_5, full_6 + + # pd_op.scale: (21xi64) <- (21xi64, 1xf32) + scale_2 = paddle._C_ops.scale(arange_0, full_3, float("2"), True) + del arange_0 + + # pd_op.embedding: (21x768xf32) <- (21xi64, 1026x768xf32) + embedding_1 = paddle._C_ops.embedding(scale_2, parameter_257, -1, False) + del parameter_257 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 21x768xf32) + add_0 = paddle._C_ops.add(scale_1, embedding_1) + del embedding_1, scale_1 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_1, layer_norm_2, layer_norm_3 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_0, parameter_256, parameter_255, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_0, parameter_255, parameter_256 + + # pd_op.full: (1xf32) <- () + full_7 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_1, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_1 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_254, False, False) + del parameter_254 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_253) + del matmul_0, parameter_253 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_6 = [0, 0, 12, 64] + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_1, full_int_array_6) + del add_1 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_252, False, False) + del parameter_252 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_1, parameter_251) + del matmul_1, parameter_251 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_250, False, False) + del parameter_250 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_2, parameter_249) + del matmul_2, parameter_249 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_2, full_int_array_6) + del add_2 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(add_3, full_int_array_6) + del add_3 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_3, [0, 2, 1, 3]) + del reshape_3 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_3 = paddle._C_ops.scale(transpose_0, full_8, float("0"), True) + del transpose_0 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_3 = paddle._C_ops.matmul(scale_3, transpose_1, False, True) + del scale_3, transpose_1 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_4 = paddle._C_ops.add(matmul_3, scale_0) + del matmul_3 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_0 = paddle._C_ops.softmax(add_4, -1) + del add_4 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_4 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_4, [0, 2, 1, 3]) + del matmul_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_7 = [0, 0, 768] + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_4 = paddle._C_ops.reshape(transpose_3, full_int_array_7) + del transpose_3 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_5 = paddle._C_ops.matmul(reshape_4, parameter_248, False, False) + del parameter_248, reshape_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_5 = paddle._C_ops.add(matmul_5, parameter_247) + del matmul_5, parameter_247 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_5, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_5 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_6 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_4, layer_norm_5, layer_norm_6 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_242, parameter_241, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_6, parameter_241, parameter_242 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_4, parameter_246, False, False) + del parameter_246 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_245) + del matmul_6, parameter_245 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_0 = paddle._C_ops.gelu(add_7, False) + del add_7 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_0, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_0 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_7 = paddle._C_ops.matmul(dropout_6, parameter_244, False, False) + del dropout_6, parameter_244 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_8 = paddle._C_ops.add(matmul_7, parameter_243) + del matmul_7, parameter_243 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_8, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_9 = paddle._C_ops.add(layer_norm_4, dropout_8) + del dropout_8, layer_norm_4 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_7, layer_norm_8, layer_norm_9 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_9, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_9, parameter_239, parameter_240 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_7, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_237) + del matmul_8, parameter_237 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_10, full_int_array_6) + del add_10 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_7, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_11 = paddle._C_ops.add(matmul_9, parameter_235) + del matmul_9, parameter_235 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_7, parameter_234, False, False) + del parameter_234 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_10, parameter_233) + del matmul_10, parameter_233 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_11, full_int_array_6) + del add_11 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(add_12, full_int_array_6) + del add_12 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_7, [0, 2, 1, 3]) + del reshape_7 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_4, full_8, float("0"), True) + del transpose_4 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_11 = paddle._C_ops.matmul(scale_4, transpose_5, False, True) + del scale_4, transpose_5 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_13 = paddle._C_ops.add(matmul_11, scale_0) + del matmul_11 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_1 = paddle._C_ops.softmax(add_13, -1) + del add_13 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_12 = paddle._C_ops.matmul(dropout_10, transpose_6, False, False) + del dropout_10, transpose_6 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_12, [0, 2, 1, 3]) + del matmul_12 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(transpose_7, full_int_array_7) + del transpose_7 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_13 = paddle._C_ops.matmul(reshape_8, parameter_232, False, False) + del parameter_232, reshape_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_13, parameter_231) + del matmul_13, parameter_231 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_14, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_14 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_15 = paddle._C_ops.add(layer_norm_7, dropout_12) + del dropout_12, layer_norm_7 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_10, layer_norm_11, layer_norm_12 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_15, parameter_226, parameter_225, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_15, parameter_225, parameter_226 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_10, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_229) + del matmul_14, parameter_229 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_1 = paddle._C_ops.gelu(add_16, False) + del add_16 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_1, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_1 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_15 = paddle._C_ops.matmul(dropout_14, parameter_228, False, False) + del dropout_14, parameter_228 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_15, parameter_227) + del matmul_15, parameter_227 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_17, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_17 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_18 = paddle._C_ops.add(layer_norm_10, dropout_16) + del dropout_16, layer_norm_10 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_13, layer_norm_14, layer_norm_15 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_224, parameter_223, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18, parameter_223, parameter_224 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_13, parameter_222, False, False) + del parameter_222 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_221) + del matmul_16, parameter_221 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_19, full_int_array_6) + del add_19 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_13, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_20 = paddle._C_ops.add(matmul_17, parameter_219) + del matmul_17, parameter_219 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_13, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_18, parameter_217) + del matmul_18, parameter_217 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_20, full_int_array_6) + del add_20 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(add_21, full_int_array_6) + del add_21 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_11, [0, 2, 1, 3]) + del reshape_11 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_5 = paddle._C_ops.scale(transpose_8, full_8, float("0"), True) + del transpose_8 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_19 = paddle._C_ops.matmul(scale_5, transpose_9, False, True) + del scale_5, transpose_9 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_22 = paddle._C_ops.add(matmul_19, scale_0) + del matmul_19 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_2 = paddle._C_ops.softmax(add_22, -1) + del add_22 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_20 = paddle._C_ops.matmul(dropout_18, transpose_10, False, False) + del dropout_18, transpose_10 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_20, [0, 2, 1, 3]) + del matmul_20 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_12 = paddle._C_ops.reshape(transpose_11, full_int_array_7) + del transpose_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_21 = paddle._C_ops.matmul(reshape_12, parameter_216, False, False) + del parameter_216, reshape_12 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_23 = paddle._C_ops.add(matmul_21, parameter_215) + del matmul_21, parameter_215 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_23, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_23 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_24 = paddle._C_ops.add(layer_norm_13, dropout_20) + del dropout_20, layer_norm_13 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_16, layer_norm_17, layer_norm_18 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_24, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_24, parameter_209, parameter_210 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_16, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_213) + del matmul_22, parameter_213 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_2 = paddle._C_ops.gelu(add_25, False) + del add_25 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_2, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_2 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_23 = paddle._C_ops.matmul(dropout_22, parameter_212, False, False) + del dropout_22, parameter_212 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_23, parameter_211) + del matmul_23, parameter_211 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_26, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_26 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_27 = paddle._C_ops.add(layer_norm_16, dropout_24) + del dropout_24, layer_norm_16 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_19, layer_norm_20, layer_norm_21 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_27, parameter_208, parameter_207, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_27, parameter_207, parameter_208 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_19, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_205) + del matmul_24, parameter_205 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_28, full_int_array_6) + del add_28 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_19, parameter_204, False, False) + del parameter_204 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_29 = paddle._C_ops.add(matmul_25, parameter_203) + del matmul_25, parameter_203 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_19, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_26, parameter_201) + del matmul_26, parameter_201 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_29, full_int_array_6) + del add_29 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_30, full_int_array_6) + del add_30 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_15, [0, 2, 1, 3]) + del reshape_15 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_6 = paddle._C_ops.scale(transpose_12, full_8, float("0"), True) + del transpose_12 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_27 = paddle._C_ops.matmul(scale_6, transpose_13, False, True) + del scale_6, transpose_13 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_31 = paddle._C_ops.add(matmul_27, scale_0) + del matmul_27 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_3 = paddle._C_ops.softmax(add_31, -1) + del add_31 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_28 = paddle._C_ops.matmul(dropout_26, transpose_14, False, False) + del dropout_26, transpose_14 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_16 = paddle._C_ops.reshape(transpose_15, full_int_array_7) + del transpose_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_16, parameter_200, False, False) + del parameter_200, reshape_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_32 = paddle._C_ops.add(matmul_29, parameter_199) + del matmul_29, parameter_199 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_32, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_32 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_33 = paddle._C_ops.add(layer_norm_19, dropout_28) + del dropout_28, layer_norm_19 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_22, layer_norm_23, layer_norm_24 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_33, parameter_194, parameter_193, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_33, parameter_193, parameter_194 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_22, parameter_198, False, False) + del parameter_198 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_197) + del matmul_30, parameter_197 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_3 = paddle._C_ops.gelu(add_34, False) + del add_34 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_3, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_3 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(dropout_30, parameter_196, False, False) + del dropout_30, parameter_196 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_35 = paddle._C_ops.add(matmul_31, parameter_195) + del matmul_31, parameter_195 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_35, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_35 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_36 = paddle._C_ops.add(layer_norm_22, dropout_32) + del dropout_32, layer_norm_22 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_25, layer_norm_26, layer_norm_27 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_36, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_36, parameter_191, parameter_192 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_25, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_189) + del matmul_32, parameter_189 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_37, full_int_array_6) + del add_37 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_25, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_38 = paddle._C_ops.add(matmul_33, parameter_187) + del matmul_33, parameter_187 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_25, parameter_186, False, False) + del parameter_186 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_39 = paddle._C_ops.add(matmul_34, parameter_185) + del matmul_34, parameter_185 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_38, full_int_array_6) + del add_38 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_19 = paddle._C_ops.reshape(add_39, full_int_array_6) + del add_39 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_19, [0, 2, 1, 3]) + del reshape_19 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_7 = paddle._C_ops.scale(transpose_16, full_8, float("0"), True) + del transpose_16 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_35 = paddle._C_ops.matmul(scale_7, transpose_17, False, True) + del scale_7, transpose_17 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_40 = paddle._C_ops.add(matmul_35, scale_0) + del matmul_35 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_4 = paddle._C_ops.softmax(add_40, -1) + del add_40 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_36 = paddle._C_ops.matmul(dropout_34, transpose_18, False, False) + del dropout_34, transpose_18 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_36, [0, 2, 1, 3]) + del matmul_36 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_20 = paddle._C_ops.reshape(transpose_19, full_int_array_7) + del transpose_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_37 = paddle._C_ops.matmul(reshape_20, parameter_184, False, False) + del parameter_184, reshape_20 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_37, parameter_183) + del matmul_37, parameter_183 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_41, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_41 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_42 = paddle._C_ops.add(layer_norm_25, dropout_36) + del dropout_36, layer_norm_25 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_28, layer_norm_29, layer_norm_30 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_178, parameter_177, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_42, parameter_177, parameter_178 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_28, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_43 = paddle._C_ops.add(matmul_38, parameter_181) + del matmul_38, parameter_181 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_43, False) + del add_43 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_4, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_4 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_39 = paddle._C_ops.matmul(dropout_38, parameter_180, False, False) + del dropout_38, parameter_180 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_39, parameter_179) + del matmul_39, parameter_179 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_44, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_44 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_45 = paddle._C_ops.add(layer_norm_28, dropout_40) + del dropout_40, layer_norm_28 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_31, layer_norm_32, layer_norm_33 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_176, parameter_175, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_45, parameter_175, parameter_176 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_31, parameter_174, False, False) + del parameter_174 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_46 = paddle._C_ops.add(matmul_40, parameter_173) + del matmul_40, parameter_173 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_46, full_int_array_6) + del add_46 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_31, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_41, parameter_171) + del matmul_41, parameter_171 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_31, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_48 = paddle._C_ops.add(matmul_42, parameter_169) + del matmul_42, parameter_169 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_47, full_int_array_6) + del add_47 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_23 = paddle._C_ops.reshape(add_48, full_int_array_6) + del add_48 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_23, [0, 2, 1, 3]) + del reshape_23 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_8 = paddle._C_ops.scale(transpose_20, full_8, float("0"), True) + del transpose_20 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_21, False, True) + del scale_8, transpose_21 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_49 = paddle._C_ops.add(matmul_43, scale_0) + del matmul_43 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_5 = paddle._C_ops.softmax(add_49, -1) + del add_49 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_44 = paddle._C_ops.matmul(dropout_42, transpose_22, False, False) + del dropout_42, transpose_22 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_44, [0, 2, 1, 3]) + del matmul_44 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_23, full_int_array_7) + del transpose_23 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_45 = paddle._C_ops.matmul(reshape_24, parameter_168, False, False) + del parameter_168, reshape_24 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_50 = paddle._C_ops.add(matmul_45, parameter_167) + del matmul_45, parameter_167 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_50, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_50 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_51 = paddle._C_ops.add(layer_norm_31, dropout_44) + del dropout_44, layer_norm_31 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_34, layer_norm_35, layer_norm_36 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_51, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_51, parameter_161, parameter_162 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_34, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_52 = paddle._C_ops.add(matmul_46, parameter_165) + del matmul_46, parameter_165 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_52, False) + del add_52 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_5, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_5 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_47 = paddle._C_ops.matmul(dropout_46, parameter_164, False, False) + del dropout_46, parameter_164 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_53 = paddle._C_ops.add(matmul_47, parameter_163) + del matmul_47, parameter_163 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_53, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_53 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_54 = paddle._C_ops.add(layer_norm_34, dropout_48) + del dropout_48, layer_norm_34 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_37, layer_norm_38, layer_norm_39 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_54, parameter_160, parameter_159, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_54, parameter_159, parameter_160 + + # pd_op.slice: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + scale_0, [2], full_int_array_1, full_int_array_3, [1], [] + ) + del full_int_array_1, full_int_array_3, scale_0 + + # pd_op.reshape: (1x21xi64) <- (1x21xi64, 2xi64) + reshape_25 = paddle._C_ops.reshape(set_value__0, full_int_array_5) + del full_int_array_5, set_value__0 + + # pd_op.full: (21x21xf32) <- () + full_9 = paddle._C_ops.full( + [21, 21], + float("-inf"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.triu: (21x21xf32) <- (21x21xf32) + triu_0 = paddle._C_ops.triu(full_9, 1) + del full_9 + + # pd_op.embedding: (1x21x768xf32) <- (1x21xi64, 50265x768xf32) + embedding_2 = paddle._C_ops.embedding(reshape_25, parameter_258, -1, False) + del parameter_258, reshape_25 + + # pd_op.scale: (1x21x768xf32) <- (1x21x768xf32, 1xf32) + scale_9 = paddle._C_ops.scale(embedding_2, full_3, float("0"), True) + del embedding_2, full_3 + + # pd_op.embedding: (21x768xf32) <- (21xi64, 1026x768xf32) + embedding_3 = paddle._C_ops.embedding(scale_2, parameter_158, -1, False) + del parameter_158, scale_2 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 21x768xf32) + add_55 = paddle._C_ops.add(scale_9, embedding_3) + del embedding_3, scale_9 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_40, layer_norm_41, layer_norm_42 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_55, parameter_157, parameter_156, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_55, parameter_156, parameter_157 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_40, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_40 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_48 = paddle._C_ops.matmul(dropout_50, parameter_155, False, False) + del parameter_155 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_56 = paddle._C_ops.add(matmul_48, parameter_154) + del matmul_48, parameter_154 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_56, full_int_array_6) + del add_56 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(dropout_50, parameter_153, False, False) + del parameter_153 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_152) + del matmul_49, parameter_152 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(dropout_50, parameter_151, False, False) + del parameter_151 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_150) + del matmul_50, parameter_150 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_27 = paddle._C_ops.reshape(add_57, full_int_array_6) + del add_57 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_27, [0, 2, 1, 3]) + del reshape_27 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_58, full_int_array_6) + del add_58 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_10 = paddle._C_ops.scale(transpose_24, full_8, float("0"), True) + del transpose_24 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_51 = paddle._C_ops.matmul(scale_10, transpose_25, False, True) + del scale_10, transpose_25 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 21x21xf32) + add_59 = paddle._C_ops.add(matmul_51, triu_0) + del matmul_51 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_6 = paddle._C_ops.softmax(add_59, -1) + del add_59 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_52 = paddle._C_ops.matmul(dropout_52, transpose_26, False, False) + del dropout_52, transpose_26 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_29 = paddle._C_ops.reshape(transpose_27, full_int_array_7) + del transpose_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_53 = paddle._C_ops.matmul(reshape_29, parameter_149, False, False) + del parameter_149, reshape_29 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_60 = paddle._C_ops.add(matmul_53, parameter_148) + del matmul_53, parameter_148 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_60, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_60 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_61 = paddle._C_ops.add(dropout_50, dropout_54) + del dropout_50, dropout_54 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_43, layer_norm_44, layer_norm_45 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_135, parameter_134, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_61, parameter_134, parameter_135 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_43, parameter_147, False, False) + del parameter_147 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_62 = paddle._C_ops.add(matmul_54, parameter_146) + del matmul_54, parameter_146 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_62, full_int_array_6) + del add_62 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_37, parameter_145, False, False) + del parameter_145 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_144) + del matmul_55, parameter_144 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_37, parameter_143, False, False) + del parameter_143 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_56, parameter_142) + del matmul_56, parameter_142 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(add_63, full_int_array_6) + del add_63 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_31, [0, 2, 1, 3]) + del reshape_31 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_64, full_int_array_6) + del add_64 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_11 = paddle._C_ops.scale(transpose_28, full_8, float("0"), True) + del transpose_28 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_57 = paddle._C_ops.matmul(scale_11, transpose_29, False, True) + del scale_11, transpose_29 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_65 = paddle._C_ops.add(matmul_57, slice_1) + del matmul_57 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_7 = paddle._C_ops.softmax(add_65, -1) + del add_65 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_58 = paddle._C_ops.matmul(dropout_56, transpose_30, False, False) + del dropout_56, transpose_30 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_58, [0, 2, 1, 3]) + del matmul_58 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_31, full_int_array_7) + del transpose_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(reshape_33, parameter_141, False, False) + del parameter_141, reshape_33 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_59, parameter_140) + del matmul_59, parameter_140 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_66, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_66 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_67 = paddle._C_ops.add(layer_norm_43, dropout_58) + del dropout_58, layer_norm_43 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_46, layer_norm_47, layer_norm_48 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_133, parameter_132, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_67, parameter_132, parameter_133 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_46, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_68 = paddle._C_ops.add(matmul_60, parameter_138) + del matmul_60, parameter_138 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_68, False) + del add_68 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_6, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_6 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_61 = paddle._C_ops.matmul(dropout_60, parameter_137, False, False) + del dropout_60, parameter_137 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_69 = paddle._C_ops.add(matmul_61, parameter_136) + del matmul_61, parameter_136 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_69, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_69 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_70 = paddle._C_ops.add(layer_norm_46, dropout_62) + del dropout_62, layer_norm_46 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_49, layer_norm_50, layer_norm_51 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_131, parameter_130, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_70, parameter_130, parameter_131 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_49, parameter_129, False, False) + del parameter_129 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_71 = paddle._C_ops.add(matmul_62, parameter_128) + del matmul_62, parameter_128 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_71, full_int_array_6) + del add_71 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_49, parameter_127, False, False) + del parameter_127 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_126) + del matmul_63, parameter_126 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_49, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_73 = paddle._C_ops.add(matmul_64, parameter_124) + del matmul_64, parameter_124 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(add_72, full_int_array_6) + del add_72 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_35, [0, 2, 1, 3]) + del reshape_35 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_73, full_int_array_6) + del add_73 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_12 = paddle._C_ops.scale(transpose_32, full_8, float("0"), True) + del transpose_32 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_65 = paddle._C_ops.matmul(scale_12, transpose_33, False, True) + del scale_12, transpose_33 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 21x21xf32) + add_74 = paddle._C_ops.add(matmul_65, triu_0) + del matmul_65 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_8 = paddle._C_ops.softmax(add_74, -1) + del add_74 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_66 = paddle._C_ops.matmul(dropout_64, transpose_34, False, False) + del dropout_64, transpose_34 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_66, [0, 2, 1, 3]) + del matmul_66 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_37 = paddle._C_ops.reshape(transpose_35, full_int_array_7) + del transpose_35 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(reshape_37, parameter_123, False, False) + del parameter_123, reshape_37 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_67, parameter_122) + del matmul_67, parameter_122 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_75, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_75 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_76 = paddle._C_ops.add(layer_norm_49, dropout_66) + del dropout_66, layer_norm_49 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_52, layer_norm_53, layer_norm_54 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_109, parameter_108, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_76, parameter_108, parameter_109 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_68 = paddle._C_ops.matmul(layer_norm_52, parameter_121, False, False) + del parameter_121 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_77 = paddle._C_ops.add(matmul_68, parameter_120) + del matmul_68, parameter_120 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_77, full_int_array_6) + del add_77 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(layer_norm_37, parameter_119, False, False) + del parameter_119 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_78 = paddle._C_ops.add(matmul_69, parameter_118) + del matmul_69, parameter_118 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_37, parameter_117, False, False) + del parameter_117 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_79 = paddle._C_ops.add(matmul_70, parameter_116) + del matmul_70, parameter_116 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(add_78, full_int_array_6) + del add_78 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_39, [0, 2, 1, 3]) + del reshape_39 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_79, full_int_array_6) + del add_79 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_13 = paddle._C_ops.scale(transpose_36, full_8, float("0"), True) + del transpose_36 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_71 = paddle._C_ops.matmul(scale_13, transpose_37, False, True) + del scale_13, transpose_37 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_80 = paddle._C_ops.add(matmul_71, slice_1) + del matmul_71 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_9 = paddle._C_ops.softmax(add_80, -1) + del add_80 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_72 = paddle._C_ops.matmul(dropout_68, transpose_38, False, False) + del dropout_68, transpose_38 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_72, [0, 2, 1, 3]) + del matmul_72 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(transpose_39, full_int_array_7) + del transpose_39 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(reshape_41, parameter_115, False, False) + del parameter_115, reshape_41 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_81 = paddle._C_ops.add(matmul_73, parameter_114) + del matmul_73, parameter_114 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_81, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_81 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_82 = paddle._C_ops.add(layer_norm_52, dropout_70) + del dropout_70, layer_norm_52 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_55, layer_norm_56, layer_norm_57 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_82, parameter_107, parameter_106, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_82, parameter_106, parameter_107 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_55, parameter_113, False, False) + del parameter_113 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_83 = paddle._C_ops.add(matmul_74, parameter_112) + del matmul_74, parameter_112 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_83, False) + del add_83 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_7, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_7 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_75 = paddle._C_ops.matmul(dropout_72, parameter_111, False, False) + del dropout_72, parameter_111 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_75, parameter_110) + del matmul_75, parameter_110 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_84, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_84 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_85 = paddle._C_ops.add(layer_norm_55, dropout_74) + del dropout_74, layer_norm_55 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_58, layer_norm_59, layer_norm_60 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_85, parameter_105, parameter_104, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_85, parameter_104, parameter_105 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_76 = paddle._C_ops.matmul(layer_norm_58, parameter_103, False, False) + del parameter_103 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_76, parameter_102) + del matmul_76, parameter_102 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_86, full_int_array_6) + del add_86 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_77 = paddle._C_ops.matmul(layer_norm_58, parameter_101, False, False) + del parameter_101 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_77, parameter_100) + del matmul_77, parameter_100 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_58, parameter_99, False, False) + del parameter_99 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_88 = paddle._C_ops.add(matmul_78, parameter_98) + del matmul_78, parameter_98 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(add_87, full_int_array_6) + del add_87 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_43, [0, 2, 1, 3]) + del reshape_43 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_88, full_int_array_6) + del add_88 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_14 = paddle._C_ops.scale(transpose_40, full_8, float("0"), True) + del transpose_40 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_79 = paddle._C_ops.matmul(scale_14, transpose_41, False, True) + del scale_14, transpose_41 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 21x21xf32) + add_89 = paddle._C_ops.add(matmul_79, triu_0) + del matmul_79 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_10 = paddle._C_ops.softmax(add_89, -1) + del add_89 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_80 = paddle._C_ops.matmul(dropout_76, transpose_42, False, False) + del dropout_76, transpose_42 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_80, [0, 2, 1, 3]) + del matmul_80 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_45 = paddle._C_ops.reshape(transpose_43, full_int_array_7) + del transpose_43 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(reshape_45, parameter_97, False, False) + del parameter_97, reshape_45 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_90 = paddle._C_ops.add(matmul_81, parameter_96) + del matmul_81, parameter_96 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_90, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_90 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_91 = paddle._C_ops.add(layer_norm_58, dropout_78) + del dropout_78, layer_norm_58 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_61, layer_norm_62, layer_norm_63 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_83, parameter_82, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_91, parameter_82, parameter_83 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_61, parameter_95, False, False) + del parameter_95 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_92 = paddle._C_ops.add(matmul_82, parameter_94) + del matmul_82, parameter_94 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_92, full_int_array_6) + del add_92 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_37, parameter_93, False, False) + del parameter_93 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_83, parameter_92) + del matmul_83, parameter_92 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_37, parameter_91, False, False) + del parameter_91 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_84, parameter_90) + del matmul_84, parameter_90 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(add_93, full_int_array_6) + del add_93 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_47, [0, 2, 1, 3]) + del reshape_47 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(add_94, full_int_array_6) + del add_94 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_48, [0, 2, 1, 3]) + del reshape_48 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_15 = paddle._C_ops.scale(transpose_44, full_8, float("0"), True) + del transpose_44 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_85 = paddle._C_ops.matmul(scale_15, transpose_45, False, True) + del scale_15, transpose_45 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_95 = paddle._C_ops.add(matmul_85, slice_1) + del matmul_85 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_11 = paddle._C_ops.softmax(add_95, -1) + del add_95 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_86 = paddle._C_ops.matmul(dropout_80, transpose_46, False, False) + del dropout_80, transpose_46 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_86, [0, 2, 1, 3]) + del matmul_86 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(transpose_47, full_int_array_7) + del transpose_47 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_87 = paddle._C_ops.matmul(reshape_49, parameter_89, False, False) + del parameter_89, reshape_49 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_96 = paddle._C_ops.add(matmul_87, parameter_88) + del matmul_87, parameter_88 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_96, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_96 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_97 = paddle._C_ops.add(layer_norm_61, dropout_82) + del dropout_82, layer_norm_61 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_64, layer_norm_65, layer_norm_66 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_97, parameter_81, parameter_80, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_97, parameter_80, parameter_81 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_88 = paddle._C_ops.matmul(layer_norm_64, parameter_87, False, False) + del parameter_87 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_98 = paddle._C_ops.add(matmul_88, parameter_86) + del matmul_88, parameter_86 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_98, False) + del add_98 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_8, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_8 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_89 = paddle._C_ops.matmul(dropout_84, parameter_85, False, False) + del dropout_84, parameter_85 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_99 = paddle._C_ops.add(matmul_89, parameter_84) + del matmul_89, parameter_84 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_99, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_99 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_100 = paddle._C_ops.add(layer_norm_64, dropout_86) + del dropout_86, layer_norm_64 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_67, layer_norm_68, layer_norm_69 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_100, parameter_79, parameter_78, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_100, parameter_78, parameter_79 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_67, parameter_77, False, False) + del parameter_77 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_101 = paddle._C_ops.add(matmul_90, parameter_76) + del matmul_90, parameter_76 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(add_101, full_int_array_6) + del add_101 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_48 = paddle._C_ops.transpose(reshape_50, [0, 2, 1, 3]) + del reshape_50 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_67, parameter_75, False, False) + del parameter_75 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_91, parameter_74) + del matmul_91, parameter_74 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_92 = paddle._C_ops.matmul(layer_norm_67, parameter_73, False, False) + del parameter_73 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_92, parameter_72) + del matmul_92, parameter_72 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_51 = paddle._C_ops.reshape(add_102, full_int_array_6) + del add_102 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_49 = paddle._C_ops.transpose(reshape_51, [0, 2, 1, 3]) + del reshape_51 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(add_103, full_int_array_6) + del add_103 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_50 = paddle._C_ops.transpose(reshape_52, [0, 2, 1, 3]) + del reshape_52 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_16 = paddle._C_ops.scale(transpose_48, full_8, float("0"), True) + del transpose_48 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_93 = paddle._C_ops.matmul(scale_16, transpose_49, False, True) + del scale_16, transpose_49 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 21x21xf32) + add_104 = paddle._C_ops.add(matmul_93, triu_0) + del matmul_93 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_12 = paddle._C_ops.softmax(add_104, -1) + del add_104 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_12, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_12 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_94 = paddle._C_ops.matmul(dropout_88, transpose_50, False, False) + del dropout_88, transpose_50 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_51 = paddle._C_ops.transpose(matmul_94, [0, 2, 1, 3]) + del matmul_94 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_53 = paddle._C_ops.reshape(transpose_51, full_int_array_7) + del transpose_51 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_95 = paddle._C_ops.matmul(reshape_53, parameter_71, False, False) + del parameter_71, reshape_53 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_95, parameter_70) + del matmul_95, parameter_70 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_105, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_105 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_106 = paddle._C_ops.add(layer_norm_67, dropout_90) + del dropout_90, layer_norm_67 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_70, layer_norm_71, layer_norm_72 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_57, parameter_56, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_106, parameter_56, parameter_57 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_70, parameter_69, False, False) + del parameter_69 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_107 = paddle._C_ops.add(matmul_96, parameter_68) + del matmul_96, parameter_68 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(add_107, full_int_array_6) + del add_107 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_52 = paddle._C_ops.transpose(reshape_54, [0, 2, 1, 3]) + del reshape_54 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(layer_norm_37, parameter_67, False, False) + del parameter_67 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_108 = paddle._C_ops.add(matmul_97, parameter_66) + del matmul_97, parameter_66 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_98 = paddle._C_ops.matmul(layer_norm_37, parameter_65, False, False) + del parameter_65 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_109 = paddle._C_ops.add(matmul_98, parameter_64) + del matmul_98, parameter_64 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_55 = paddle._C_ops.reshape(add_108, full_int_array_6) + del add_108 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_53 = paddle._C_ops.transpose(reshape_55, [0, 2, 1, 3]) + del reshape_55 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(add_109, full_int_array_6) + del add_109 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_54 = paddle._C_ops.transpose(reshape_56, [0, 2, 1, 3]) + del reshape_56 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_17 = paddle._C_ops.scale(transpose_52, full_8, float("0"), True) + del transpose_52 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_99 = paddle._C_ops.matmul(scale_17, transpose_53, False, True) + del scale_17, transpose_53 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_110 = paddle._C_ops.add(matmul_99, slice_1) + del matmul_99 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_13 = paddle._C_ops.softmax(add_110, -1) + del add_110 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_13, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_13 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_100 = paddle._C_ops.matmul(dropout_92, transpose_54, False, False) + del dropout_92, transpose_54 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_55 = paddle._C_ops.transpose(matmul_100, [0, 2, 1, 3]) + del matmul_100 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_57 = paddle._C_ops.reshape(transpose_55, full_int_array_7) + del transpose_55 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_101 = paddle._C_ops.matmul(reshape_57, parameter_63, False, False) + del parameter_63, reshape_57 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_101, parameter_62) + del matmul_101, parameter_62 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_111, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_111 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_112 = paddle._C_ops.add(layer_norm_70, dropout_94) + del dropout_94, layer_norm_70 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_73, layer_norm_74, layer_norm_75 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_55, parameter_54, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_112, parameter_54, parameter_55 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_73, parameter_61, False, False) + del parameter_61 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_113 = paddle._C_ops.add(matmul_102, parameter_60) + del matmul_102, parameter_60 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_113, False) + del add_113 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_9, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_9 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_103 = paddle._C_ops.matmul(dropout_96, parameter_59, False, False) + del dropout_96, parameter_59 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_114 = paddle._C_ops.add(matmul_103, parameter_58) + del matmul_103, parameter_58 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_114, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_114 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_115 = paddle._C_ops.add(layer_norm_73, dropout_98) + del dropout_98, layer_norm_73 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_76, layer_norm_77, layer_norm_78 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_53, parameter_52, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_115, parameter_52, parameter_53 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_104 = paddle._C_ops.matmul(layer_norm_76, parameter_51, False, False) + del parameter_51 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_116 = paddle._C_ops.add(matmul_104, parameter_50) + del matmul_104, parameter_50 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(add_116, full_int_array_6) + del add_116 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_56 = paddle._C_ops.transpose(reshape_58, [0, 2, 1, 3]) + del reshape_58 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_76, parameter_49, False, False) + del parameter_49 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_117 = paddle._C_ops.add(matmul_105, parameter_48) + del matmul_105, parameter_48 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_106 = paddle._C_ops.matmul(layer_norm_76, parameter_47, False, False) + del parameter_47 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_118 = paddle._C_ops.add(matmul_106, parameter_46) + del matmul_106, parameter_46 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_59 = paddle._C_ops.reshape(add_117, full_int_array_6) + del add_117 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_57 = paddle._C_ops.transpose(reshape_59, [0, 2, 1, 3]) + del reshape_59 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(add_118, full_int_array_6) + del add_118 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_58 = paddle._C_ops.transpose(reshape_60, [0, 2, 1, 3]) + del reshape_60 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_18 = paddle._C_ops.scale(transpose_56, full_8, float("0"), True) + del transpose_56 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_107 = paddle._C_ops.matmul(scale_18, transpose_57, False, True) + del scale_18, transpose_57 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 21x21xf32) + add_119 = paddle._C_ops.add(matmul_107, triu_0) + del matmul_107 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_14 = paddle._C_ops.softmax(add_119, -1) + del add_119 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_14, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_14 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_108 = paddle._C_ops.matmul(dropout_100, transpose_58, False, False) + del dropout_100, transpose_58 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_59 = paddle._C_ops.transpose(matmul_108, [0, 2, 1, 3]) + del matmul_108 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_61 = paddle._C_ops.reshape(transpose_59, full_int_array_7) + del transpose_59 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_109 = paddle._C_ops.matmul(reshape_61, parameter_45, False, False) + del parameter_45, reshape_61 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_120 = paddle._C_ops.add(matmul_109, parameter_44) + del matmul_109, parameter_44 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_102, dropout_103 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_120, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_120 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_121 = paddle._C_ops.add(layer_norm_76, dropout_102) + del dropout_102, layer_norm_76 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_79, layer_norm_80, layer_norm_81 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_121, parameter_31, parameter_30, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_121, parameter_30, parameter_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_79, parameter_43, False, False) + del parameter_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_122 = paddle._C_ops.add(matmul_110, parameter_42) + del matmul_110, parameter_42 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(add_122, full_int_array_6) + del add_122 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_60 = paddle._C_ops.transpose(reshape_62, [0, 2, 1, 3]) + del reshape_62 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_111 = paddle._C_ops.matmul(layer_norm_37, parameter_41, False, False) + del parameter_41 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_123 = paddle._C_ops.add(matmul_111, parameter_40) + del matmul_111, parameter_40 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_37, parameter_39, False, False) + del parameter_39 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_124 = paddle._C_ops.add(matmul_112, parameter_38) + del matmul_112, parameter_38 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(add_123, full_int_array_6) + del add_123 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_61 = paddle._C_ops.transpose(reshape_63, [0, 2, 1, 3]) + del reshape_63 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(add_124, full_int_array_6) + del add_124 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_62 = paddle._C_ops.transpose(reshape_64, [0, 2, 1, 3]) + del reshape_64 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_19 = paddle._C_ops.scale(transpose_60, full_8, float("0"), True) + del transpose_60 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_113 = paddle._C_ops.matmul(scale_19, transpose_61, False, True) + del scale_19, transpose_61 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_125 = paddle._C_ops.add(matmul_113, slice_1) + del matmul_113 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_15 = paddle._C_ops.softmax(add_125, -1) + del add_125 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_104, dropout_105 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_15, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_15 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_114 = paddle._C_ops.matmul(dropout_104, transpose_62, False, False) + del dropout_104, transpose_62 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_63 = paddle._C_ops.transpose(matmul_114, [0, 2, 1, 3]) + del matmul_114 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_65 = paddle._C_ops.reshape(transpose_63, full_int_array_7) + del transpose_63 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_115 = paddle._C_ops.matmul(reshape_65, parameter_37, False, False) + del parameter_37, reshape_65 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_126 = paddle._C_ops.add(matmul_115, parameter_36) + del matmul_115, parameter_36 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_106, dropout_107 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_126, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_126 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_127 = paddle._C_ops.add(layer_norm_79, dropout_106) + del dropout_106, layer_norm_79 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_82, layer_norm_83, layer_norm_84 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_127, parameter_29, parameter_28, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_127, parameter_28, parameter_29 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_82, parameter_35, False, False) + del parameter_35 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_128 = paddle._C_ops.add(matmul_116, parameter_34) + del matmul_116, parameter_34 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_128, False) + del add_128 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_108, dropout_109 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_10, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_10 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_117 = paddle._C_ops.matmul(dropout_108, parameter_33, False, False) + del dropout_108, parameter_33 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_129 = paddle._C_ops.add(matmul_117, parameter_32) + del matmul_117, parameter_32 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_110, dropout_111 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_129, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_129 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_130 = paddle._C_ops.add(layer_norm_82, dropout_110) + del dropout_110, layer_norm_82 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_85, layer_norm_86, layer_norm_87 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_130, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_130, parameter_26, parameter_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_118 = paddle._C_ops.matmul(layer_norm_85, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_131 = paddle._C_ops.add(matmul_118, parameter_24) + del matmul_118, parameter_24 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(add_131, full_int_array_6) + del add_131 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_64 = paddle._C_ops.transpose(reshape_66, [0, 2, 1, 3]) + del reshape_66 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_119 = paddle._C_ops.matmul(layer_norm_85, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_132 = paddle._C_ops.add(matmul_119, parameter_22) + del matmul_119, parameter_22 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_85, parameter_21, False, False) + del parameter_21 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_133 = paddle._C_ops.add(matmul_120, parameter_20) + del matmul_120, parameter_20 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(add_132, full_int_array_6) + del add_132 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_65 = paddle._C_ops.transpose(reshape_67, [0, 2, 1, 3]) + del reshape_67 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(add_133, full_int_array_6) + del add_133 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_66 = paddle._C_ops.transpose(reshape_68, [0, 2, 1, 3]) + del reshape_68 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_20 = paddle._C_ops.scale(transpose_64, full_8, float("0"), True) + del transpose_64 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_121 = paddle._C_ops.matmul(scale_20, transpose_65, False, True) + del scale_20, transpose_65 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 21x21xf32) + add_134 = paddle._C_ops.add(matmul_121, triu_0) + del matmul_121, triu_0 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_16 = paddle._C_ops.softmax(add_134, -1) + del add_134 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_112, dropout_113 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_16, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_16 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_122 = paddle._C_ops.matmul(dropout_112, transpose_66, False, False) + del dropout_112, transpose_66 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_67 = paddle._C_ops.transpose(matmul_122, [0, 2, 1, 3]) + del matmul_122 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_67, full_int_array_7) + del transpose_67 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_123 = paddle._C_ops.matmul(reshape_69, parameter_19, False, False) + del parameter_19, reshape_69 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_135 = paddle._C_ops.add(matmul_123, parameter_18) + del matmul_123, parameter_18 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_114, dropout_115 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_135, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_135 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_136 = paddle._C_ops.add(layer_norm_85, dropout_114) + del dropout_114, layer_norm_85 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_88, layer_norm_89, layer_norm_90 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_136, parameter_5, parameter_4, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_136, parameter_4, parameter_5 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_124 = paddle._C_ops.matmul(layer_norm_88, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_137 = paddle._C_ops.add(matmul_124, parameter_16) + del matmul_124, parameter_16 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_137, full_int_array_6) + del add_137 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_68 = paddle._C_ops.transpose(reshape_70, [0, 2, 1, 3]) + del reshape_70 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_125 = paddle._C_ops.matmul(layer_norm_37, parameter_15, False, False) + del parameter_15 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_138 = paddle._C_ops.add(matmul_125, parameter_14) + del matmul_125, parameter_14 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_37, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_139 = paddle._C_ops.add(matmul_126, parameter_12) + del matmul_126, parameter_12 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(add_138, full_int_array_6) + del add_138 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_69 = paddle._C_ops.transpose(reshape_71, [0, 2, 1, 3]) + del reshape_71 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_72 = paddle._C_ops.reshape(add_139, full_int_array_6) + del add_139, full_int_array_6 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_70 = paddle._C_ops.transpose(reshape_72, [0, 2, 1, 3]) + del reshape_72 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_21 = paddle._C_ops.scale(transpose_68, full_8, float("0"), True) + del full_8, transpose_68 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_127 = paddle._C_ops.matmul(scale_21, transpose_69, False, True) + del scale_21, transpose_69 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_140 = paddle._C_ops.add(matmul_127, slice_1) + del matmul_127, slice_1 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_17 = paddle._C_ops.softmax(add_140, -1) + del add_140 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_116, dropout_117 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_17, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_17 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_128 = paddle._C_ops.matmul(dropout_116, transpose_70, False, False) + del dropout_116, transpose_70 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_71 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_73 = paddle._C_ops.reshape(transpose_71, full_int_array_7) + del full_int_array_7, transpose_71 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_129 = paddle._C_ops.matmul(reshape_73, parameter_11, False, False) + del parameter_11, reshape_73 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_141 = paddle._C_ops.add(matmul_129, parameter_10) + del matmul_129, parameter_10 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_118, dropout_119 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_141, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_141 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_142 = paddle._C_ops.add(layer_norm_88, dropout_118) + del dropout_118, layer_norm_88 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_91, layer_norm_92, layer_norm_93 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_142, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_142, parameter_2, parameter_3 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_130 = paddle._C_ops.matmul(layer_norm_91, parameter_9, False, False) + del parameter_9 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_143 = paddle._C_ops.add(matmul_130, parameter_8) + del matmul_130, parameter_8 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_143, False) + del add_143 + + # pd_op.dropout: (1x21x3072xf32, 1x21x3072xui8) <- (1x21x3072xf32, None, 1xf32) + dropout_120, dropout_121 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_11, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_131 = paddle._C_ops.matmul(dropout_120, parameter_7, False, False) + del dropout_120, parameter_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_144 = paddle._C_ops.add(matmul_131, parameter_6) + del matmul_131, parameter_6 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_122, dropout_123 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_144, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_144, full_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_145 = paddle._C_ops.add(layer_norm_91, dropout_122) + del dropout_122, layer_norm_91 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_0, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_145, parameter_1, parameter_0, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_145, layer_norm_37, parameter_0, parameter_1 + + return layer_norm_0 diff --git a/paddle_samples/PaddleNLP/bart-base/weight_meta.py b/paddle_samples/PaddleNLP/bart-base/weight_meta.py new file mode 100644 index 000000000..da97709f4 --- /dev/null +++ b/paddle_samples/PaddleNLP/bart-base/weight_meta.py @@ -0,0 +1,2847 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [768] + dtype = "float32" + min_val = float("-1.04492") + max_val = float("0.306152") + mean = float("0.055763") + std = float("0.089925") + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + min_val = float("0.173218") + max_val = float("3.4375") + mean = float("1.94378") + std = float("0.223736") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + min_val = float("-3.43164") + max_val = float("0.798828") + mean = float("-0.0740031") + std = float("0.183041") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + min_val = float("0.0898438") + max_val = float("1.05859") + mean = float("0.448075") + std = float("0.0791667") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + min_val = float("-1.96484") + max_val = float("0.452637") + mean = float("-0.0887964") + std = float("0.179041") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [768] + dtype = "float32" + min_val = float("0.100952") + max_val = float("1.38281") + mean = float("0.700254") + std = float("0.0784551") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768] + dtype = "float32" + min_val = float("-0.169556") + max_val = float("0.132935") + mean = float("0.000291553") + std = float("0.0443588") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [3072, 768] + dtype = "float32" + min_val = float("-3.50391") + max_val = float("2.57422") + mean = float("-6.89082e-06") + std = float("0.0345879") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [3072] + dtype = "float32" + min_val = float("-0.286377") + max_val = float("0.204346") + mean = float("-0.0645512") + std = float("0.0490256") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.619629") + max_val = float("1.25098") + mean = float("0.00965715") + std = float("0.0439153") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + min_val = float("-0.250488") + max_val = float("1.37598") + mean = float("-0.00115733") + std = float("0.095266") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.835938") + max_val = float("0.911621") + mean = float("-3.88315e-05") + std = float("0.0716457") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768] + dtype = "float32" + min_val = float("-0.251221") + max_val = float("0.283203") + mean = float("0.000441133") + std = float("0.0337921") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.452393") + max_val = float("0.563477") + mean = float("3.47693e-05") + std = float("0.0701192") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + min_val = float("-3.61328") + max_val = float("4.75781") + mean = float("0.00439294") + std = float("0.663468") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.865723") + max_val = float("0.789551") + mean = float("8.22635e-05") + std = float("0.0927952") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + min_val = float("-0.488037") + max_val = float("0.389404") + mean = float("0.000932506") + std = float("0.163855") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.7334") + max_val = float("1.85742") + mean = float("-0.000255722") + std = float("0.0943624") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + min_val = float("-0.235352") + max_val = float("0.364746") + mean = float("0.00010426") + std = float("0.0253047") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.807617") + max_val = float("0.620605") + mean = float("1.63106e-05") + std = float("0.041461") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + min_val = float("-0.0592346") + max_val = float("0.0612488") + mean = float("-0.000747097") + std = float("0.0133801") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.300781") + max_val = float("0.322266") + mean = float("0.000227124") + std = float("0.0476506") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + min_val = float("-1.29199") + max_val = float("1.19531") + mean = float("-0.0081555") + std = float("0.132014") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.55371") + max_val = float("1.74512") + mean = float("0.000290464") + std = float("0.0809273") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [768] + dtype = "float32" + min_val = float("-0.81543") + max_val = float("0.980469") + mean = float("0.0296052") + std = float("0.33146") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.41016") + max_val = float("1.4834") + mean = float("-0.00106662") + std = float("0.0814963") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + min_val = float("-0.525391") + max_val = float("1.01855") + mean = float("-0.0194161") + std = float("0.0492517") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768] + dtype = "float32" + min_val = float("0.162476") + max_val = float("1.24023") + mean = float("0.450819") + std = float("0.039689") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [768] + dtype = "float32" + min_val = float("-0.584961") + max_val = float("0.831543") + mean = float("-0.0801088") + std = float("0.0702904") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768] + dtype = "float32" + min_val = float("0.300537") + max_val = float("4.84766") + mean = float("0.43358") + std = float("0.1703") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [768] + dtype = "float32" + min_val = float("-1.84668") + max_val = float("0.795898") + mean = float("-0.0622566") + std = float("0.11909") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768] + dtype = "float32" + min_val = float("0.430176") + max_val = float("1.08105") + mean = float("0.856139") + std = float("0.0427192") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768] + dtype = "float32" + min_val = float("-0.389648") + max_val = float("0.293213") + mean = float("0.00130731") + std = float("0.0739026") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [3072, 768] + dtype = "float32" + min_val = float("-7.99219") + max_val = float("1.37988") + mean = float("1.10711e-05") + std = float("0.0398258") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [3072] + dtype = "float32" + min_val = float("-0.512695") + max_val = float("0.184204") + mean = float("-0.101517") + std = float("0.0660059") + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.644043") + max_val = float("0.643555") + mean = float("0.0102842") + std = float("0.0506794") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + min_val = float("-0.354248") + max_val = float("0.405029") + mean = float("-0.000719839") + std = float("0.107218") + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.670898") + max_val = float("0.637207") + mean = float("1.43374e-05") + std = float("0.0605907") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768] + dtype = "float32" + min_val = float("-0.30127") + max_val = float("0.42627") + mean = float("0.000952943") + std = float("0.0434266") + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.387207") + max_val = float("0.429688") + mean = float("0.000127731") + std = float("0.061711") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768] + dtype = "float32" + min_val = float("-0.253174") + max_val = float("0.419189") + mean = float("-0.00177001") + std = float("0.0497663") + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.776855") + max_val = float("0.799316") + mean = float("7.70503e-05") + std = float("0.0864494") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + min_val = float("-0.891113") + max_val = float("0.683594") + mean = float("-0.00213796") + std = float("0.261913") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.750977") + max_val = float("0.873047") + mean = float("6.84442e-05") + std = float("0.0878441") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [768] + dtype = "float32" + min_val = float("-0.168823") + max_val = float("0.632324") + mean = float("0.00101271") + std = float("0.0418257") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.821289") + max_val = float("0.790527") + mean = float("3.19495e-05") + std = float("0.0396491") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768] + dtype = "float32" + min_val = float("-0.182373") + max_val = float("0.14624") + mean = float("-0.000428564") + std = float("0.0211172") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.29126") + max_val = float("0.411133") + mean = float("8.66595e-05") + std = float("0.044702") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + min_val = float("-0.190063") + max_val = float("0.234985") + mean = float("0.000707203") + std = float("0.0223429") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.29395") + max_val = float("1.32422") + mean = float("0.00019809") + std = float("0.0789618") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768] + dtype = "float32" + min_val = float("-0.854492") + max_val = float("0.936523") + mean = float("0.0127257") + std = float("0.330065") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.15039") + max_val = float("1.05664") + mean = float("-0.000430712") + std = float("0.0801342") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768] + dtype = "float32" + min_val = float("-0.574219") + max_val = float("0.175659") + mean = float("-0.0174997") + std = float("0.0296027") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768] + dtype = "float32" + min_val = float("0.113892") + max_val = float("0.970215") + mean = float("0.45621") + std = float("0.0352021") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768] + dtype = "float32" + min_val = float("-0.939453") + max_val = float("1.45996") + mean = float("-0.0318848") + std = float("0.102574") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + min_val = float("0.342529") + max_val = float("2.05273") + mean = float("0.4438") + std = float("0.0814034") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [768] + dtype = "float32" + min_val = float("-0.831055") + max_val = float("1.01172") + mean = float("0.0763767") + std = float("0.0859585") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768] + dtype = "float32" + min_val = float("0.376465") + max_val = float("1.48242") + mean = float("0.867288") + std = float("0.0510089") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768] + dtype = "float32" + min_val = float("-0.449463") + max_val = float("0.487061") + mean = float("0.000971075") + std = float("0.0768242") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [3072, 768] + dtype = "float32" + min_val = float("-8.75") + max_val = float("1.29297") + mean = float("1.43174e-05") + std = float("0.0403077") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [3072] + dtype = "float32" + min_val = float("-0.50293") + max_val = float("0.518066") + mean = float("-0.117669") + std = float("0.089743") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.59082") + max_val = float("0.566895") + mean = float("0.00265853") + std = float("0.0538356") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768] + dtype = "float32" + min_val = float("-0.241455") + max_val = float("0.182861") + mean = float("-0.00041845") + std = float("0.0623274") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.678223") + max_val = float("0.672852") + mean = float("8.92212e-06") + std = float("0.059449") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768] + dtype = "float32" + min_val = float("-0.218506") + max_val = float("0.34082") + mean = float("0.00411712") + std = float("0.0441978") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.334961") + max_val = float("0.41748") + mean = float("0.000128222") + std = float("0.061607") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + min_val = float("-0.226685") + max_val = float("0.141357") + mean = float("-8.03176e-05") + std = float("0.0257067") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.789551") + max_val = float("0.868164") + mean = float("0.000165547") + std = float("0.0860855") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768] + dtype = "float32" + min_val = float("-1.04688") + max_val = float("1.00195") + mean = float("0.00284015") + std = float("0.212825") + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.649902") + max_val = float("0.692871") + mean = float("0.000140688") + std = float("0.0874408") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768] + dtype = "float32" + min_val = float("-0.123291") + max_val = float("0.224243") + mean = float("0.00126714") + std = float("0.0280615") + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.560059") + max_val = float("0.460693") + mean = float("2.28893e-06") + std = float("0.0395396") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + min_val = float("-0.0745239") + max_val = float("0.12915") + mean = float("0.00015724") + std = float("0.0152254") + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.318604") + max_val = float("0.417236") + mean = float("-6.24724e-05") + std = float("0.0436322") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768] + dtype = "float32" + min_val = float("-0.0912476") + max_val = float("0.0578613") + mean = float("2.14512e-05") + std = float("0.011152") + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.28223") + max_val = float("1.32227") + mean = float("3.04413e-05") + std = float("0.0783237") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768] + dtype = "float32" + min_val = float("-0.82666") + max_val = float("0.908203") + mean = float("-0.0128437") + std = float("0.341964") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.18848") + max_val = float("1.08301") + mean = float("0.00056576") + std = float("0.0795572") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + min_val = float("-0.217529") + max_val = float("0.217651") + mean = float("-0.0218131") + std = float("0.0205795") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768] + dtype = "float32" + min_val = float("0.0614319") + max_val = float("0.955078") + mean = float("0.449719") + std = float("0.0347093") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768] + dtype = "float32" + min_val = float("-2.22266") + max_val = float("0.947754") + mean = float("0.0312634") + std = float("0.114534") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768] + dtype = "float32" + min_val = float("0.351562") + max_val = float("4.48438") + mean = float("0.447064") + std = float("0.158699") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768] + dtype = "float32" + min_val = float("-1.71289") + max_val = float("0.557617") + mean = float("-0.0879682") + std = float("0.0934652") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + min_val = float("0.454346") + max_val = float("1.08105") + mean = float("0.860268") + std = float("0.0483674") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + min_val = float("-0.310059") + max_val = float("0.560059") + mean = float("0.000284155") + std = float("0.0833974") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [3072, 768] + dtype = "float32" + min_val = float("-8.57031") + max_val = float("1.39453") + mean = float("-1.11405e-05") + std = float("0.0429414") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [3072] + dtype = "float32" + min_val = float("-0.414551") + max_val = float("0.718262") + mean = float("-0.108521") + std = float("0.0752428") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.921387") + max_val = float("0.639648") + mean = float("-0.00312582") + std = float("0.0542425") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768] + dtype = "float32" + min_val = float("-0.215576") + max_val = float("0.171021") + mean = float("-0.000124251") + std = float("0.0563701") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.643066") + max_val = float("0.593262") + mean = float("-1.83982e-05") + std = float("0.066252") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + min_val = float("-0.279297") + max_val = float("0.187866") + mean = float("-0.000428466") + std = float("0.0345215") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.373535") + max_val = float("0.445312") + mean = float("-6.60571e-05") + std = float("0.064403") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [768] + dtype = "float32" + min_val = float("-0.0603638") + max_val = float("0.0473633") + mean = float("-2.00152e-05") + std = float("0.00939217") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.24902") + max_val = float("1.01172") + mean = float("-6.80772e-05") + std = float("0.085726") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768] + dtype = "float32" + min_val = float("-0.70166") + max_val = float("0.717773") + mean = float("0.00868722") + std = float("0.185542") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.970703") + max_val = float("1.11133") + mean = float("-0.000605337") + std = float("0.0885201") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + min_val = float("-0.169678") + max_val = float("0.265625") + mean = float("0.00446949") + std = float("0.0439148") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.785645") + max_val = float("0.54834") + mean = float("-6.01225e-06") + std = float("0.0431069") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768] + dtype = "float32" + min_val = float("-0.130371") + max_val = float("0.0531311") + mean = float("-0.000277936") + std = float("0.0113482") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.432861") + max_val = float("0.347412") + mean = float("1.49241e-05") + std = float("0.0471165") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768] + dtype = "float32" + min_val = float("-0.0888672") + max_val = float("0.0707397") + mean = float("-0.000208571") + std = float("0.00922685") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.27832") + max_val = float("1.38672") + mean = float("-0.000194419") + std = float("0.0777348") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + min_val = float("-0.838379") + max_val = float("0.794922") + mean = float("-0.012945") + std = float("0.297579") + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.19336") + max_val = float("1.12012") + mean = float("0.000514542") + std = float("0.0786299") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [768] + dtype = "float32" + min_val = float("-0.445801") + max_val = float("0.427002") + mean = float("-0.0174128") + std = float("0.0391896") + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768] + dtype = "float32" + min_val = float("0.0701904") + max_val = float("0.846191") + mean = float("0.453216") + std = float("0.0378213") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768] + dtype = "float32" + min_val = float("-1.66406") + max_val = float("1.25586") + mean = float("0.00152094") + std = float("0.152635") + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + min_val = float("0.35498") + max_val = float("2.83594") + mean = float("0.422616") + std = float("0.113766") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + min_val = float("-1.19141") + max_val = float("0.778809") + mean = float("-0.0969552") + std = float("0.102802") + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + min_val = float("0.325195") + max_val = float("1.28223") + mean = float("0.789448") + std = float("0.0633351") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768] + dtype = "float32" + min_val = float("-0.505371") + max_val = float("0.302734") + mean = float("-2.42236e-05") + std = float("0.079225") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [3072, 768] + dtype = "float32" + min_val = float("-16.0") + max_val = float("1.5293") + mean = float("3.81097e-06") + std = float("0.047523") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [3072] + dtype = "float32" + min_val = float("-0.657715") + max_val = float("0.853027") + mean = float("-0.113052") + std = float("0.0792442") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768, 3072] + dtype = "float32" + min_val = float("-4.28516") + max_val = float("3.55273") + mean = float("-0.000331231") + std = float("0.0573905") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + min_val = float("-0.30957") + max_val = float("0.254395") + mean = float("0.00151452") + std = float("0.079716") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.581055") + max_val = float("0.697266") + mean = float("-8.59857e-05") + std = float("0.0701496") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [768] + dtype = "float32" + min_val = float("-0.207397") + max_val = float("0.177246") + mean = float("-0.000163083") + std = float("0.0344208") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.362793") + max_val = float("0.40918") + mean = float("1.47647e-05") + std = float("0.0686297") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768] + dtype = "float32" + min_val = float("-0.0366516") + max_val = float("0.0196533") + mean = float("-0.000163542") + std = float("0.00484102") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.05859") + max_val = float("0.830566") + mean = float("-1.70015e-05") + std = float("0.0835551") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + min_val = float("-0.691895") + max_val = float("0.602539") + mean = float("-0.000167921") + std = float("0.187074") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.73291") + max_val = float("0.792969") + mean = float("-0.000171674") + std = float("0.0862973") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768] + dtype = "float32" + min_val = float("-0.322266") + max_val = float("0.326172") + mean = float("0.00665764") + std = float("0.0539895") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.719727") + max_val = float("0.59668") + mean = float("8.39694e-06") + std = float("0.0400239") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768] + dtype = "float32" + min_val = float("-0.232178") + max_val = float("0.144531") + mean = float("-0.000777754") + std = float("0.0177002") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.311035") + max_val = float("0.535645") + mean = float("3.55577e-05") + std = float("0.0412915") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + min_val = float("-0.04953") + max_val = float("0.0541992") + mean = float("7.13267e-05") + std = float("0.00657817") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.42676") + max_val = float("1.26953") + mean = float("-5.46152e-07") + std = float("0.076587") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [768] + dtype = "float32" + min_val = float("-1.16016") + max_val = float("1.12598") + mean = float("-0.0047377") + std = float("0.312352") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.17578") + max_val = float("1.56641") + mean = float("0.000141877") + std = float("0.0771282") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768] + dtype = "float32" + min_val = float("-1.08496") + max_val = float("0.491943") + mean = float("-0.0137443") + std = float("0.0612447") + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + min_val = float("0.0552979") + max_val = float("0.859375") + mean = float("0.448837") + std = float("0.0475385") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + min_val = float("-1.79102") + max_val = float("2.39453") + mean = float("0.0118306") + std = float("0.15963") + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768] + dtype = "float32" + min_val = float("0.221802") + max_val = float("1.71582") + mean = float("0.386176") + std = float("0.0745859") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768] + dtype = "float32" + min_val = float("-0.683594") + max_val = float("1.88184") + mean = float("-0.118065") + std = float("0.116923") + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [768] + dtype = "float32" + min_val = float("0.0745239") + max_val = float("1.16699") + mean = float("0.82075") + std = float("0.0724713") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768] + dtype = "float32" + min_val = float("-0.502441") + max_val = float("0.364258") + mean = float("0.00420779") + std = float("0.0872173") + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [3072, 768] + dtype = "float32" + min_val = float("-8.89844") + max_val = float("1.70898") + mean = float("-5.21274e-05") + std = float("0.0544477") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [3072] + dtype = "float32" + min_val = float("-0.597168") + max_val = float("0.663574") + mean = float("-0.099702") + std = float("0.0814544") + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768, 3072] + dtype = "float32" + min_val = float("-2.30469") + max_val = float("1.85645") + mean = float("-0.000716272") + std = float("0.0636168") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [768] + dtype = "float32" + min_val = float("-0.357178") + max_val = float("0.34375") + mean = float("0.00170967") + std = float("0.118222") + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.945312") + max_val = float("0.866699") + mean = float("-1.87612e-05") + std = float("0.0678677") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768] + dtype = "float32" + min_val = float("-0.371826") + max_val = float("0.228516") + mean = float("-0.00193949") + std = float("0.0475839") + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.336426") + max_val = float("0.433105") + mean = float("-0.000198563") + std = float("0.066908") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + min_val = float("-0.0270386") + max_val = float("0.0551453") + mean = float("0.000303509") + std = float("0.00573231") + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.637207") + max_val = float("0.703125") + mean = float("-8.79024e-05") + std = float("0.085253") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768] + dtype = "float32" + min_val = float("-0.702637") + max_val = float("0.77002") + mean = float("-0.00542469") + std = float("0.166002") + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.941406") + max_val = float("1.03711") + mean = float("0.000273284") + std = float("0.0895451") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768] + dtype = "float32" + min_val = float("-0.610352") + max_val = float("0.316406") + mean = float("0.000763194") + std = float("0.08586") + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.824707") + max_val = float("0.943848") + mean = float("7.09477e-06") + std = float("0.037178") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768] + dtype = "float32" + min_val = float("-0.5625") + max_val = float("0.742188") + mean = float("-0.00283873") + std = float("0.0791875") + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.390137") + max_val = float("0.364502") + mean = float("-1.03331e-05") + std = float("0.0367269") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [768] + dtype = "float32" + min_val = float("-0.0423279") + max_val = float("0.0409546") + mean = float("-0.000207591") + std = float("0.00617958") + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.24707") + max_val = float("1.1123") + mean = float("-2.14991e-05") + std = float("0.0831736") + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768] + dtype = "float32" + min_val = float("-1.91016") + max_val = float("1.57617") + mean = float("-0.0154683") + std = float("0.578655") + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.47754") + max_val = float("1.46191") + mean = float("-5.53774e-06") + std = float("0.0802971") + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + min_val = float("-0.535645") + max_val = float("0.442627") + mean = float("0.00182839") + std = float("0.0813837") + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + min_val = float("0.0585022") + max_val = float("0.912109") + mean = float("0.544659") + std = float("0.084057") + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [1026, 768] + dtype = "float32" + min_val = float("-3.52539") + max_val = float("2.60938") + mean = float("1.24352e-05") + std = float("0.0444859") + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [768] + dtype = "float32" + min_val = float("-1.36328") + max_val = float("0.548828") + mean = float("0.00836064") + std = float("0.0607747") + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768] + dtype = "float32" + min_val = float("0.081665") + max_val = float("0.330566") + mean = float("0.285268") + std = float("0.0176816") + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768] + dtype = "float32" + min_val = float("-0.663086") + max_val = float("1.60449") + mean = float("0.0474303") + std = float("0.0966976") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + min_val = float("0.245239") + max_val = float("3.61523") + mean = float("0.525189") + std = float("0.151526") + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + min_val = float("-0.190796") + max_val = float("0.19165") + mean = float("-0.000218492") + std = float("0.0559028") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [3072, 768] + dtype = "float32" + min_val = float("-2.40625") + max_val = float("2.63672") + mean = float("-9.17343e-06") + std = float("0.0367557") + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [3072] + dtype = "float32" + min_val = float("-0.673828") + max_val = float("0.380859") + mean = float("-0.0986715") + std = float("0.0815347") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.740234") + max_val = float("0.485596") + mean = float("-0.00427577") + std = float("0.0506977") + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768] + dtype = "float32" + min_val = float("-0.127808") + max_val = float("0.125244") + mean = float("0.000134931") + std = float("0.0280722") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.450928") + max_val = float("0.445801") + mean = float("-6.7444e-06") + std = float("0.0402753") + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768] + dtype = "float32" + min_val = float("-0.219238") + max_val = float("0.112427") + mean = float("-0.000542198") + std = float("0.0201758") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.287354") + max_val = float("0.304932") + mean = float("-9.6642e-05") + std = float("0.0408038") + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [768] + dtype = "float32" + min_val = float("-0.0557556") + max_val = float("0.0474243") + mean = float("-0.000290657") + std = float("0.00681903") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.864746") + max_val = float("0.875") + mean = float("2.86067e-05") + std = float("0.0779768") + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768] + dtype = "float32" + min_val = float("-0.892578") + max_val = float("0.794922") + mean = float("-0.00337976") + std = float("0.29479") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.821289") + max_val = float("0.724609") + mean = float("0.000132867") + std = float("0.0780431") + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + min_val = float("-0.53125") + max_val = float("0.651367") + mean = float("0.0206089") + std = float("0.0447142") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [768] + dtype = "float32" + min_val = float("0.216431") + max_val = float("0.509277") + mean = float("0.438104") + std = float("0.0412302") + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [768] + dtype = "float32" + min_val = float("-1.02734") + max_val = float("1.24902") + mean = float("0.0191815") + std = float("0.0949325") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768] + dtype = "float32" + min_val = float("0.249268") + max_val = float("2.80664") + mean = float("0.506467") + std = float("0.117859") + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + min_val = float("-0.20105") + max_val = float("0.202759") + mean = float("0.00112513") + std = float("0.0543952") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [3072, 768] + dtype = "float32" + min_val = float("-1.33887") + max_val = float("3.57812") + mean = float("-4.80607e-06") + std = float("0.0419992") + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [3072] + dtype = "float32" + min_val = float("-0.450439") + max_val = float("0.274658") + mean = float("-0.100086") + std = float("0.0798469") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.700195") + max_val = float("0.496094") + mean = float("-0.00125378") + std = float("0.0545837") + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [768] + dtype = "float32" + min_val = float("-0.126099") + max_val = float("0.134155") + mean = float("-0.000686947") + std = float("0.0293917") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.363525") + max_val = float("0.42627") + mean = float("-4.60709e-05") + std = float("0.049824") + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768] + dtype = "float32" + min_val = float("-0.140259") + max_val = float("0.101807") + mean = float("0.00126621") + std = float("0.0208813") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.348633") + max_val = float("0.354736") + mean = float("0.000206098") + std = float("0.0512433") + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + min_val = float("-0.0294342") + max_val = float("0.0189362") + mean = float("-5.09798e-05") + std = float("0.00220086") + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.785645") + max_val = float("0.836914") + mean = float("-0.000125414") + std = float("0.075942") + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [768] + dtype = "float32" + min_val = float("-1.08594") + max_val = float("0.89209") + mean = float("0.00592435") + std = float("0.315497") + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.728516") + max_val = float("0.709961") + mean = float("0.000360004") + std = float("0.076351") + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + min_val = float("-0.340088") + max_val = float("0.50293") + mean = float("0.0183708") + std = float("0.0399188") + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + min_val = float("0.171143") + max_val = float("0.491455") + mean = float("0.41213") + std = float("0.051015") + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + min_val = float("-1.31836") + max_val = float("1.36426") + mean = float("-0.0202332") + std = float("0.122209") + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768] + dtype = "float32" + min_val = float("0.266602") + max_val = float("2.16602") + mean = float("0.474157") + std = float("0.113208") + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [768] + dtype = "float32" + min_val = float("-0.227051") + max_val = float("0.191406") + mean = float("0.000647112") + std = float("0.0631629") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [3072, 768] + dtype = "float32" + min_val = float("-1.42871") + max_val = float("6.24219") + mean = float("9.09174e-06") + std = float("0.0448247") + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [3072] + dtype = "float32" + min_val = float("-0.449219") + max_val = float("0.574707") + mean = float("-0.104708") + std = float("0.0974598") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.727051") + max_val = float("0.63623") + mean = float("0.0019482") + std = float("0.0582201") + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768] + dtype = "float32" + min_val = float("-0.109436") + max_val = float("0.155518") + mean = float("7.43844e-06") + std = float("0.0291774") + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.586426") + max_val = float("0.4375") + mean = float("1.10467e-05") + std = float("0.0527658") + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [768] + dtype = "float32" + min_val = float("-0.0904541") + max_val = float("0.178589") + mean = float("-0.000592086") + std = float("0.0249144") + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.379639") + max_val = float("0.483398") + mean = float("-0.000203025") + std = float("0.0524548") + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768] + dtype = "float32" + min_val = float("-0.0160217") + max_val = float("0.00654984") + mean = float("-5.22973e-05") + std = float("0.00123598") + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.908691") + max_val = float("0.821289") + mean = float("7.01113e-05") + std = float("0.0774551") + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [768] + dtype = "float32" + min_val = float("-0.941895") + max_val = float("1.03516") + mean = float("-0.00343673") + std = float("0.340425") + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.572266") + max_val = float("0.587891") + mean = float("1.32304e-06") + std = float("0.0773302") + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [768] + dtype = "float32" + min_val = float("-0.281006") + max_val = float("0.467529") + mean = float("0.0220191") + std = float("0.0447399") + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [768] + dtype = "float32" + min_val = float("0.19812") + max_val = float("0.473389") + mean = float("0.401014") + std = float("0.0414225") + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [768] + dtype = "float32" + min_val = float("-2.01367") + max_val = float("1.22168") + mean = float("0.00702611") + std = float("0.141371") + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [768] + dtype = "float32" + min_val = float("0.210938") + max_val = float("1.79785") + mean = float("0.471035") + std = float("0.11155") + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [768] + dtype = "float32" + min_val = float("-0.256592") + max_val = float("0.265381") + mean = float("0.00051839") + std = float("0.0699863") + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.864258") + max_val = float("6.08984") + mean = float("1.55439e-05") + std = float("0.0479069") + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [3072] + dtype = "float32" + min_val = float("-0.397705") + max_val = float("0.609863") + mean = float("-0.102438") + std = float("0.092124") + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.618652") + max_val = float("0.547363") + mean = float("-0.000260851") + std = float("0.0614206") + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [768] + dtype = "float32" + min_val = float("-0.147949") + max_val = float("0.167358") + mean = float("0.000753449") + std = float("0.0484929") + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [768, 768] + dtype = "float32" + min_val = float("-1.68555") + max_val = float("0.493408") + mean = float("-9.99574e-06") + std = float("0.0503106") + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [768] + dtype = "float32" + min_val = float("-0.106934") + max_val = float("0.193115") + mean = float("0.000729642") + std = float("0.0258956") + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.492188") + max_val = float("0.588867") + mean = float("0.000162088") + std = float("0.0484471") + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [768] + dtype = "float32" + min_val = float("-0.00254059") + max_val = float("0.00301743") + mean = float("-8.58749e-06") + std = float("0.000581241") + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.936035") + max_val = float("0.812988") + mean = float("0.00015915") + std = float("0.0781999") + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [768] + dtype = "float32" + min_val = float("-0.995605") + max_val = float("1.25781") + mean = float("-0.0191986") + std = float("0.398188") + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.508789") + max_val = float("0.615723") + mean = float("-0.000705008") + std = float("0.0774705") + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [768] + dtype = "float32" + min_val = float("-0.376953") + max_val = float("0.784668") + mean = float("0.0181663") + std = float("0.0606264") + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [768] + dtype = "float32" + min_val = float("0.13501") + max_val = float("0.467285") + mean = float("0.408541") + std = float("0.0393503") + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [768] + dtype = "float32" + min_val = float("-2.4043") + max_val = float("1.15332") + mean = float("0.0139082") + std = float("0.156624") + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [768] + dtype = "float32" + min_val = float("0.226196") + max_val = float("2.40625") + mean = float("0.489929") + std = float("0.13166") + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [768] + dtype = "float32" + min_val = float("-0.392822") + max_val = float("0.665527") + mean = float("-0.000149415") + std = float("0.0778859") + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [3072, 768] + dtype = "float32" + min_val = float("-1.51758") + max_val = float("6.30078") + mean = float("2.22621e-06") + std = float("0.0454506") + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [3072] + dtype = "float32" + min_val = float("-0.58252") + max_val = float("0.402832") + mean = float("-0.103758") + std = float("0.0886523") + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.598633") + max_val = float("0.502441") + mean = float("-0.000837495") + std = float("0.0597324") + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [768] + dtype = "float32" + min_val = float("-0.202637") + max_val = float("0.200806") + mean = float("-0.000828136") + std = float("0.0597395") + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [768, 768] + dtype = "float32" + min_val = float("-2.21875") + max_val = float("0.460938") + mean = float("-3.24762e-05") + std = float("0.0442046") + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [768] + dtype = "float32" + min_val = float("-0.320801") + max_val = float("0.339355") + mean = float("-0.0021041") + std = float("0.0456797") + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.42749") + max_val = float("0.499268") + mean = float("-0.000116") + std = float("0.0410461") + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [768] + dtype = "float32" + min_val = float("-0.00375557") + max_val = float("0.00421143") + mean = float("-2.58039e-06") + std = float("0.000723744") + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.815918") + max_val = float("0.759766") + mean = float("-3.60192e-05") + std = float("0.0794539") + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [768] + dtype = "float32" + min_val = float("-1.27734") + max_val = float("1.3623") + mean = float("0.00837527") + std = float("0.442717") + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.661621") + max_val = float("0.859375") + mean = float("0.000224048") + std = float("0.0775781") + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [768] + dtype = "float32" + min_val = float("-0.49585") + max_val = float("0.665039") + mean = float("0.00910543") + std = float("0.0552938") + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [768] + dtype = "float32" + min_val = float("0.0596313") + max_val = float("0.497314") + mean = float("0.40279") + std = float("0.0420985") + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [768] + dtype = "float32" + min_val = float("-1.48145") + max_val = float("2.00977") + mean = float("0.0247173") + std = float("0.164284") + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [768] + dtype = "float32" + min_val = float("0.274414") + max_val = float("1.89258") + mean = float("0.48181") + std = float("0.108055") + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [768] + dtype = "float32" + min_val = float("-0.424072") + max_val = float("0.342285") + mean = float("-0.000543796") + std = float("0.0890763") + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [3072, 768] + dtype = "float32" + min_val = float("-10.75") + max_val = float("1.6543") + mean = float("3.3375e-06") + std = float("0.0448999") + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [3072] + dtype = "float32" + min_val = float("-0.648438") + max_val = float("0.394531") + mean = float("-0.115854") + std = float("0.110271") + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [768, 3072] + dtype = "float32" + min_val = float("-1.1084") + max_val = float("1.2793") + mean = float("-0.00122496") + std = float("0.0567686") + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [768] + dtype = "float32" + min_val = float("-0.433838") + max_val = float("0.326904") + mean = float("0.000325684") + std = float("0.0948319") + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.937012") + max_val = float("0.84668") + mean = float("5.27927e-06") + std = float("0.0390663") + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [768] + dtype = "float32" + min_val = float("-0.563965") + max_val = float("0.51123") + mean = float("-0.00231634") + std = float("0.0678634") + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.445801") + max_val = float("0.385498") + mean = float("-1.48992e-05") + std = float("0.036985") + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [768] + dtype = "float32" + min_val = float("-0.0192108") + max_val = float("0.0244904") + mean = float("-0.000172688") + std = float("0.004512") + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.726074") + max_val = float("0.708008") + mean = float("9.19318e-05") + std = float("0.0796358") + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [768] + dtype = "float32" + min_val = float("-1.06348") + max_val = float("1.08789") + mean = float("0.00030978") + std = float("0.337028") + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.79248") + max_val = float("1.35254") + mean = float("2.30758e-05") + std = float("0.0800477") + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [768] + dtype = "float32" + min_val = float("-0.530762") + max_val = float("0.297852") + mean = float("0.00128834") + std = float("0.0702966") + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [768] + dtype = "float32" + min_val = float("0.0639038") + max_val = float("0.825195") + mean = float("0.41862") + std = float("0.060744") + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [1026, 768] + dtype = "float32" + min_val = float("-3.90039") + max_val = float("1.04004") + mean = float("-8.20941e-05") + std = float("0.0304671") + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [50265, 768] + dtype = "float32" + min_val = float("-0.50293") + max_val = float("1.12109") + mean = float("-0.014526") + std = float("0.06534") + data = None diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-base/graph_net.json b/paddle_samples/PaddleNLP/chinese-xlnet-base/graph_net.json new file mode 100644 index 000000000..d1fce9d18 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-base/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "chinese-xlnet-base", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-base/input_meta.py b/paddle_samples/PaddleNLP/chinese-xlnet-base/input_meta.py new file mode 100644 index 000000000..9ea1655e0 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-base/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 9] + dtype = "int64" + data = [19, 11684, 121, 15954, 2090, 21957, 1039, 4, 3] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 9] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 2] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 9] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1] diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-base/model.py b/paddle_samples/PaddleNLP/chinese-xlnet-base/model.py new file mode 100644 index 000000000..f7be5aed5 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-base/model.py @@ -0,0 +1,4369 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + data_0, + data_1, + data_2, + ): + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_1 = paddle._C_ops.transpose(data_0, [1, 0]) + del data_0 + + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_2 = paddle._C_ops.transpose(data_1, [1, 0]) + del data_1 + + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_3 = paddle._C_ops.transpose(data_2, [1, 0]) + del data_2 + + # pd_op.cast: (9x1xf32) <- (9x1xi64) + cast_0 = paddle._C_ops.cast(transpose_3, paddle.float32) + del transpose_3 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (9x1xf32) <- (9x1xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.unsqueeze: (1x9x1xf32) <- (9x1xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.unsqueeze: (1x9x1x1xf32) <- (1x9x1xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.greater_than: (1x9x1x1xb) <- (1x9x1x1xf32, xf32) + greater_than_0 = paddle._C_ops.greater_than(unsqueeze_1, full_1) + del unsqueeze_1 + + # pd_op.cast: (1x9x1x1xf32) <- (1x9x1x1xb) + cast_1 = paddle._C_ops.cast(greater_than_0, paddle.float32) + del greater_than_0 + + # pd_op.full: (9xf32) <- () + full_2 = paddle._C_ops.full( + [9], float("1"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.diag: (9x9xf32) <- (9xf32) + diag_0 = paddle._C_ops.diag(full_2, 0, float("0")) + del full_2 + + # pd_op.scale: (9x9xf32) <- (9x9xf32, 1xf32) + scale_1 = paddle._C_ops.scale(diag_0, full_0, float("0"), True) + del diag_0, full_0 + + # pd_op.cast: (9x9xf32) <- (9x9xf32) + cast_2 = paddle._C_ops.cast(scale_1, paddle.float32) + del scale_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [2, 3] + + # pd_op.unsqueeze: (9x9x1x1xf32) <- (9x9xf32, 2xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(cast_2, full_int_array_2) + del cast_2, full_int_array_2 + + # pd_op.add: (9x9x1x1xf32) <- (1x9x1x1xf32, 9x9x1x1xf32) + add_0 = paddle._C_ops.add(cast_1, unsqueeze_2) + del cast_1, unsqueeze_2 + + # pd_op.greater_than: (9x9x1x1xb) <- (9x9x1x1xf32, xf32) + greater_than_1 = paddle._C_ops.greater_than(add_0, full_1) + del add_0, full_1 + + # pd_op.cast: (9x9x1x1xf32) <- (9x9x1x1xb) + cast_3 = paddle._C_ops.cast(greater_than_1, paddle.float32) + del greater_than_1 + + # pd_op.embedding: (9x1x768xf32) <- (9x1xi64, 32000x768xf32) + embedding_0 = paddle._C_ops.embedding(transpose_1, parameter_204, -1, False) + del parameter_204, transpose_1 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [1] + + # pd_op.unsqueeze: (9x1x1xi64) <- (9x1xi64, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_3) + + # pd_op.unsqueeze: (1x9x1xi64) <- (9x1xi64, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_0) + del full_int_array_0, transpose_2 + + # pd_op.not_equal: (9x9x1xb) <- (9x1x1xi64, 1x9x1xi64) + not_equal_0 = paddle._C_ops.not_equal(unsqueeze_3, unsqueeze_4) + del unsqueeze_3, unsqueeze_4 + + # pd_op.cast: (9x9x1xi64) <- (9x9x1xb) + cast_4 = paddle._C_ops.cast(not_equal_0, paddle.int64) + del not_equal_0 + + # pd_op.full: (1xi32) <- () + full_4 = paddle._C_ops.full( + [1], float("2"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.one_hot: (9x9x1x2xf32) <- (9x9x1xi64, 1xi32) + one_hot_0 = paddle._C_ops.one_hot( + cast_4 % paddle.cast(full_4, cast_4.dtype), full_4 + ) + del cast_4, full_4 + + # pd_op.cast: (9x9x1x2xf32) <- (9x9x1x2xf32) + cast_5 = paddle._C_ops.cast(one_hot_0, paddle.float32) + del one_hot_0 + + # pd_op.full: (1xf64) <- () + full_5 = paddle._C_ops.full( + [1], float("0"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_6 = paddle._C_ops.full( + [1], float("768"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_7 = paddle._C_ops.full( + [1], float("2"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (384xf32) <- (1xf64, 1xf64, 1xf64) + arange_0 = paddle.arange(full_5, full_6, full_7, dtype="float32") + del full_6, full_7 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.00130208"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (384xf32) <- (384xf32, 1xf32) + scale_2 = paddle._C_ops.scale(arange_0, full_8, float("0"), True) + del arange_0, full_8 + + # pd_op.full: (384xf32) <- () + full_9 = paddle._C_ops.full( + [384], + float("10000"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.elementwise_pow: (384xf32) <- (384xf32, 384xf32) + elementwise_pow_0 = paddle._C_ops.elementwise_pow(full_9, scale_2) + del full_9, scale_2 + + # pd_op.full: (384xf32) <- () + full_10 = paddle._C_ops.full( + [384], + float("1"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.divide: (384xf32) <- (384xf32, 384xf32) + divide_0 = paddle._C_ops.divide(full_10, elementwise_pow_0) + del elementwise_pow_0, full_10 + + # pd_op.full: (1xf64) <- () + full_11 = paddle._C_ops.full( + [1], float("9"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_12 = paddle._C_ops.full( + [1], float("-9"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_13 = paddle._C_ops.full( + [1], float("-1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (18xf32) <- (1xf64, 1xf64, 1xf64) + arange_1 = paddle.arange(full_11, full_12, full_13, dtype="float32") + del full_12, full_13 + + # builtin.combine: ([18xf32, 384xf32]) <- (18xf32, 384xf32) + combine_0 = [arange_1, divide_0] + del arange_1, divide_0 + + # pd_op.einsum: (18x384xf32, [0xf32, 0xf32], [18xf32, 384xf32]) <- ([18xf32, 384xf32]) + einsum_0, einsum_1, einsum_2 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_0, "i,d->id"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_0 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_0, + split_1, + ) = einsum_1 + del einsum_1 + + # builtin.split: (18xf32, 384xf32) <- ([18xf32, 384xf32]) + ( + split_2, + split_3, + ) = einsum_2 + del einsum_2 + + # pd_op.sin: (18x384xf32) <- (18x384xf32) + sin_0 = paddle._C_ops.sin(einsum_0) + + # pd_op.cos: (18x384xf32) <- (18x384xf32) + cos_0 = paddle._C_ops.cos(einsum_0) + del einsum_0 + + # pd_op.full: (1xi32) <- () + full_14 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([18x384xf32, 18x384xf32]) <- (18x384xf32, 18x384xf32) + combine_1 = [sin_0, cos_0] + del cos_0, sin_0 + + # pd_op.concat: (18x768xf32) <- ([18x384xf32, 18x384xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_1, full_14) + del combine_1, full_14 + + # pd_op.unsqueeze: (18x1x768xf32) <- (18x768xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(concat_0, full_int_array_3) + del concat_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 1, -1] + + # pd_op.expand: (18x1x768xf32) <- (18x1x768xf32, 3xi64) + expand_0 = paddle._C_ops.expand(unsqueeze_5, full_int_array_4) + del full_int_array_4, unsqueeze_5 + + # pd_op.dropout: (18x1x768xf32, 18x1x768xui8) <- (18x1x768xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + expand_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del expand_0 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_203, False, False) + del parameter_203 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_5 = [9, 1, 12, 64] + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_0, full_int_array_5) + del matmul_0 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_202, False, False) + del parameter_202 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_1, full_int_array_5) + del matmul_1 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_201, False, False) + del parameter_201 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_2, full_int_array_5) + del matmul_2 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(dropout_2, parameter_199, False, False) + del parameter_199 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_6 = [18, -1, 12, 64] + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(matmul_3, full_int_array_6) + del matmul_3 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_1 = paddle._C_ops.add(reshape_0, parameter_196) + del parameter_196 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_2 = [add_1, reshape_1] + del add_1, reshape_1 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_3, einsum_4, einsum_5 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_2, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_2 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_4, + split_5, + ) = einsum_4 + del einsum_4 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_6, + split_7, + ) = einsum_5 + del einsum_5 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_2 = paddle._C_ops.add(reshape_0, parameter_198) + del parameter_198 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_3 = [add_2, reshape_3] + del add_2, reshape_3 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_6, einsum_7, einsum_8 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_3, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_3 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_8, + split_9, + ) = einsum_7 + del einsum_7 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_10, + split_11, + ) = einsum_8 + del einsum_8 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_7 = [1, 12, 18, 9] + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(einsum_6, full_int_array_7) + del einsum_6 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [2147483647] + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + reshape_4, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_4 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, 12, 9, 17] + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(slice_0, full_int_array_9) + del slice_0 + + # pd_op.full: (1xf64) <- () + full_15 = paddle._C_ops.full( + [1], float("1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (9xi64) <- (1xf64, 1xf64, 1xf64) + arange_2 = paddle.arange(full_5, full_11, full_15, dtype="int64") + del full_11, full_15, full_5 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_0 = paddle._C_ops.index_select(reshape_5, arange_2, 3) + del reshape_5 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_3 = paddle._C_ops.add(reshape_0, parameter_197) + del parameter_197, reshape_0 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_4 = [add_3, parameter_195] + del add_3, parameter_195 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_9, einsum_10, einsum_11 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_4, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_4 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_12, + split_13, + ) = einsum_10 + del einsum_10 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_14, + split_15, + ) = einsum_11 + del einsum_11 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_5 = [cast_5, einsum_9] + del einsum_9 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_12, einsum_13, einsum_14 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_5, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_5 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_16, + split_17, + ) = einsum_13 + del einsum_13 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_18, + split_19, + ) = einsum_14 + del einsum_14 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_4 = paddle._C_ops.add(einsum_3, index_select_0) + del einsum_3, index_select_0 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_5 = paddle._C_ops.add(add_4, einsum_12) + del add_4, einsum_12 + + # pd_op.full: (1xf32) <- () + full_16 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_3 = paddle._C_ops.scale(add_5, full_16, float("0"), True) + del add_5 + + # pd_op.transpose: (1x1x9x9xf32) <- (9x9x1x1xf32) + transpose_4 = paddle._C_ops.transpose(cast_3, [2, 3, 0, 1]) + del cast_3 + + # pd_op.full: (1xf32) <- () + full_17 = paddle._C_ops.full( + [1], float("1e+30"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x9x9xf32) <- (1x1x9x9xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_4, full_17, float("0"), True) + del full_17, transpose_4 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_0 = paddle._C_ops.subtract(scale_3, scale_4) + del scale_3 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_0 = paddle._C_ops.softmax(subtract_0, 3) + del subtract_0 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_6 = [dropout_4, reshape_2] + del dropout_4, reshape_2 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_15, einsum_16, einsum_17 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_6, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_6 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_20, + split_21, + ) = einsum_16 + del einsum_16 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_22, + split_23, + ) = einsum_17 + del einsum_17 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_10 = [9, 1, 768] + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_6 = paddle._C_ops.reshape(einsum_15, full_int_array_10) + del einsum_15 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_7 = [reshape_6, parameter_200] + del parameter_200, reshape_6 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_18, einsum_19, einsum_20 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_7, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_7 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_24, + split_25, + ) = einsum_19 + del einsum_19 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_26, + split_27, + ) = einsum_20 + del einsum_20 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_18 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_6 = paddle._C_ops.add(dropout_6, dropout_0) + del dropout_0, dropout_6 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_194, parameter_193, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_6, parameter_193, parameter_194 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_0, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_7 = paddle._C_ops.add(matmul_4, parameter_189) + del matmul_4, parameter_189 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_0 = paddle._C_ops.relu(add_7) + del add_7 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_0 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_5 = paddle._C_ops.matmul(dropout_8, parameter_188, False, False) + del dropout_8, parameter_188 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_8 = paddle._C_ops.add(matmul_5, parameter_187) + del matmul_5, parameter_187 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_8 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_9 = paddle._C_ops.add(dropout_10, layer_norm_0) + del dropout_10, layer_norm_0 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_9, parameter_192, parameter_191, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_9, parameter_191, parameter_192 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_3, parameter_186, False, False) + del parameter_186 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(matmul_6, full_int_array_5) + del matmul_6 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_185, False, False) + del parameter_185 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_7, full_int_array_5) + del matmul_7 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_3, parameter_184, False, False) + del parameter_184 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_8, full_int_array_5) + del matmul_8 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(dropout_2, parameter_182, False, False) + del parameter_182 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_9, full_int_array_6) + del matmul_9 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_10 = paddle._C_ops.add(reshape_7, parameter_179) + del parameter_179 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_8 = [add_10, reshape_8] + del add_10, reshape_8 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_21, einsum_22, einsum_23 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_8, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_8 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_28, + split_29, + ) = einsum_22 + del einsum_22 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_30, + split_31, + ) = einsum_23 + del einsum_23 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_11 = paddle._C_ops.add(reshape_7, parameter_181) + del parameter_181 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_9 = [add_11, reshape_10] + del add_11, reshape_10 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_24, einsum_25, einsum_26 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_9, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_9 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_32, + split_33, + ) = einsum_25 + del einsum_25 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_34, + split_35, + ) = einsum_26 + del einsum_26 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(einsum_24, full_int_array_7) + del einsum_24 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + reshape_11, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_11 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(slice_1, full_int_array_9) + del slice_1 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_1 = paddle._C_ops.index_select(reshape_12, arange_2, 3) + del reshape_12 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_12 = paddle._C_ops.add(reshape_7, parameter_180) + del parameter_180, reshape_7 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_10 = [add_12, parameter_178] + del add_12, parameter_178 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_27, einsum_28, einsum_29 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_10, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_10 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_36, + split_37, + ) = einsum_28 + del einsum_28 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_38, + split_39, + ) = einsum_29 + del einsum_29 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_11 = [cast_5, einsum_27] + del einsum_27 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_30, einsum_31, einsum_32 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_11, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_11 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_40, + split_41, + ) = einsum_31 + del einsum_31 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_42, + split_43, + ) = einsum_32 + del einsum_32 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_13 = paddle._C_ops.add(einsum_21, index_select_1) + del einsum_21, index_select_1 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_14 = paddle._C_ops.add(add_13, einsum_30) + del add_13, einsum_30 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_5 = paddle._C_ops.scale(add_14, full_16, float("0"), True) + del add_14 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_1 = paddle._C_ops.subtract(scale_5, scale_4) + del scale_5 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_1 = paddle._C_ops.softmax(subtract_1, 3) + del subtract_1 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_12 = [dropout_12, reshape_9] + del dropout_12, reshape_9 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_33, einsum_34, einsum_35 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_12, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_12 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_44, + split_45, + ) = einsum_34 + del einsum_34 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_46, + split_47, + ) = einsum_35 + del einsum_35 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_13 = paddle._C_ops.reshape(einsum_33, full_int_array_10) + del einsum_33 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_13 = [reshape_13, parameter_183] + del parameter_183, reshape_13 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_36, einsum_37, einsum_38 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_13, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_13 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_48, + split_49, + ) = einsum_37 + del einsum_37 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_50, + split_51, + ) = einsum_38 + del einsum_38 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_36, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_36 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_15 = paddle._C_ops.add(dropout_14, layer_norm_3) + del dropout_14, layer_norm_3 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_15, parameter_177, parameter_176, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_15, parameter_176, parameter_177 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_16 = paddle._C_ops.add(matmul_10, parameter_172) + del matmul_10, parameter_172 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_1 = paddle._C_ops.relu(add_16) + del add_16 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_1 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_11 = paddle._C_ops.matmul(dropout_16, parameter_171, False, False) + del dropout_16, parameter_171 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_11, parameter_170) + del matmul_11, parameter_170 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_17 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_18 = paddle._C_ops.add(dropout_18, layer_norm_6) + del dropout_18, layer_norm_6 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_175, parameter_174, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18, parameter_174, parameter_175 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_9, parameter_169, False, False) + del parameter_169 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_12, full_int_array_5) + del matmul_12 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_13 = paddle._C_ops.matmul(layer_norm_9, parameter_168, False, False) + del parameter_168 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(matmul_13, full_int_array_5) + del matmul_13 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_9, parameter_167, False, False) + del parameter_167 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_14, full_int_array_5) + del matmul_14 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_15 = paddle._C_ops.matmul(dropout_2, parameter_165, False, False) + del parameter_165 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_15, full_int_array_6) + del matmul_15 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_19 = paddle._C_ops.add(reshape_14, parameter_162) + del parameter_162 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_14 = [add_19, reshape_15] + del add_19, reshape_15 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_39, einsum_40, einsum_41 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_14, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_14 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_52, + split_53, + ) = einsum_40 + del einsum_40 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_54, + split_55, + ) = einsum_41 + del einsum_41 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_20 = paddle._C_ops.add(reshape_14, parameter_164) + del parameter_164 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_15 = [add_20, reshape_17] + del add_20, reshape_17 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_42, einsum_43, einsum_44 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_15, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_15 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_56, + split_57, + ) = einsum_43 + del einsum_43 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_58, + split_59, + ) = einsum_44 + del einsum_44 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(einsum_42, full_int_array_7) + del einsum_42 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + reshape_18, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_18 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_19 = paddle._C_ops.reshape(slice_2, full_int_array_9) + del slice_2 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_2 = paddle._C_ops.index_select(reshape_19, arange_2, 3) + del reshape_19 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_21 = paddle._C_ops.add(reshape_14, parameter_163) + del parameter_163, reshape_14 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_16 = [add_21, parameter_161] + del add_21, parameter_161 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_45, einsum_46, einsum_47 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_16, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_16 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_60, + split_61, + ) = einsum_46 + del einsum_46 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_62, + split_63, + ) = einsum_47 + del einsum_47 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_17 = [cast_5, einsum_45] + del einsum_45 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_48, einsum_49, einsum_50 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_17, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_17 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_64, + split_65, + ) = einsum_49 + del einsum_49 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_66, + split_67, + ) = einsum_50 + del einsum_50 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_22 = paddle._C_ops.add(einsum_39, index_select_2) + del einsum_39, index_select_2 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_23 = paddle._C_ops.add(add_22, einsum_48) + del add_22, einsum_48 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_6 = paddle._C_ops.scale(add_23, full_16, float("0"), True) + del add_23 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_2 = paddle._C_ops.subtract(scale_6, scale_4) + del scale_6 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_2 = paddle._C_ops.softmax(subtract_2, 3) + del subtract_2 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_18 = [dropout_20, reshape_16] + del dropout_20, reshape_16 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_51, einsum_52, einsum_53 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_18, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_18 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_68, + split_69, + ) = einsum_52 + del einsum_52 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_70, + split_71, + ) = einsum_53 + del einsum_53 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_20 = paddle._C_ops.reshape(einsum_51, full_int_array_10) + del einsum_51 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_19 = [reshape_20, parameter_166] + del parameter_166, reshape_20 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_54, einsum_55, einsum_56 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_19, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_19 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_72, + split_73, + ) = einsum_55 + del einsum_55 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_74, + split_75, + ) = einsum_56 + del einsum_56 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_54, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_54 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_24 = paddle._C_ops.add(dropout_22, layer_norm_9) + del dropout_22, layer_norm_9 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_24, parameter_160, parameter_159, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_24, parameter_159, parameter_160 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_12, parameter_156, False, False) + del parameter_156 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_25 = paddle._C_ops.add(matmul_16, parameter_155) + del matmul_16, parameter_155 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_2 = paddle._C_ops.relu(add_25) + del add_25 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_2 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_17 = paddle._C_ops.matmul(dropout_24, parameter_154, False, False) + del dropout_24, parameter_154 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_17, parameter_153) + del matmul_17, parameter_153 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_26, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_26 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_27 = paddle._C_ops.add(dropout_26, layer_norm_12) + del dropout_26, layer_norm_12 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_27, parameter_158, parameter_157, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_27, parameter_157, parameter_158 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_15, parameter_152, False, False) + del parameter_152 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_18, full_int_array_5) + del matmul_18 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_15, parameter_151, False, False) + del parameter_151 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_19, full_int_array_5) + del matmul_19 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_20 = paddle._C_ops.matmul(layer_norm_15, parameter_150, False, False) + del parameter_150 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_23 = paddle._C_ops.reshape(matmul_20, full_int_array_5) + del matmul_20 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_21 = paddle._C_ops.matmul(dropout_2, parameter_148, False, False) + del parameter_148 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_21, full_int_array_6) + del matmul_21 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_28 = paddle._C_ops.add(reshape_21, parameter_145) + del parameter_145 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_20 = [add_28, reshape_22] + del add_28, reshape_22 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_57, einsum_58, einsum_59 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_20, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_20 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_76, + split_77, + ) = einsum_58 + del einsum_58 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_78, + split_79, + ) = einsum_59 + del einsum_59 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_29 = paddle._C_ops.add(reshape_21, parameter_147) + del parameter_147 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_21 = [add_29, reshape_24] + del add_29, reshape_24 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_60, einsum_61, einsum_62 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_21, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_21 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_80, + split_81, + ) = einsum_61 + del einsum_61 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_82, + split_83, + ) = einsum_62 + del einsum_62 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(einsum_60, full_int_array_7) + del einsum_60 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + reshape_25, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_25 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(slice_3, full_int_array_9) + del slice_3 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_3 = paddle._C_ops.index_select(reshape_26, arange_2, 3) + del reshape_26 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_30 = paddle._C_ops.add(reshape_21, parameter_146) + del parameter_146, reshape_21 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_22 = [add_30, parameter_144] + del add_30, parameter_144 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_63, einsum_64, einsum_65 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_22, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_22 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_84, + split_85, + ) = einsum_64 + del einsum_64 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_86, + split_87, + ) = einsum_65 + del einsum_65 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_23 = [cast_5, einsum_63] + del einsum_63 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_66, einsum_67, einsum_68 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_23, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_23 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_88, + split_89, + ) = einsum_67 + del einsum_67 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_90, + split_91, + ) = einsum_68 + del einsum_68 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_31 = paddle._C_ops.add(einsum_57, index_select_3) + del einsum_57, index_select_3 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_32 = paddle._C_ops.add(add_31, einsum_66) + del add_31, einsum_66 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_7 = paddle._C_ops.scale(add_32, full_16, float("0"), True) + del add_32 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_3 = paddle._C_ops.subtract(scale_7, scale_4) + del scale_7 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_3 = paddle._C_ops.softmax(subtract_3, 3) + del subtract_3 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_24 = [dropout_28, reshape_23] + del dropout_28, reshape_23 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_69, einsum_70, einsum_71 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_24, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_24 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_92, + split_93, + ) = einsum_70 + del einsum_70 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_94, + split_95, + ) = einsum_71 + del einsum_71 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(einsum_69, full_int_array_10) + del einsum_69 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_25 = [reshape_27, parameter_149] + del parameter_149, reshape_27 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_72, einsum_73, einsum_74 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_25, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_25 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_96, + split_97, + ) = einsum_73 + del einsum_73 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_98, + split_99, + ) = einsum_74 + del einsum_74 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_72, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_72 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_33 = paddle._C_ops.add(dropout_30, layer_norm_15) + del dropout_30, layer_norm_15 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_33, parameter_143, parameter_142, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_33, parameter_142, parameter_143 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_18, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_34 = paddle._C_ops.add(matmul_22, parameter_138) + del matmul_22, parameter_138 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_3 = paddle._C_ops.relu(add_34) + del add_34 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_3 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_23 = paddle._C_ops.matmul(dropout_32, parameter_137, False, False) + del dropout_32, parameter_137 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_35 = paddle._C_ops.add(matmul_23, parameter_136) + del matmul_23, parameter_136 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_35, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_35 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_36 = paddle._C_ops.add(dropout_34, layer_norm_18) + del dropout_34, layer_norm_18 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_36, parameter_141, parameter_140, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_36, parameter_140, parameter_141 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_21, parameter_135, False, False) + del parameter_135 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_24, full_int_array_5) + del matmul_24 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_21, parameter_134, False, False) + del parameter_134 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_25, full_int_array_5) + del matmul_25 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_21, parameter_133, False, False) + del parameter_133 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_26, full_int_array_5) + del matmul_26 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(dropout_2, parameter_131, False, False) + del parameter_131 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(matmul_27, full_int_array_6) + del matmul_27 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_37 = paddle._C_ops.add(reshape_28, parameter_128) + del parameter_128 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_26 = [add_37, reshape_29] + del add_37, reshape_29 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_75, einsum_76, einsum_77 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_26, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_26 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_100, + split_101, + ) = einsum_76 + del einsum_76 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_102, + split_103, + ) = einsum_77 + del einsum_77 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_38 = paddle._C_ops.add(reshape_28, parameter_130) + del parameter_130 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_27 = [add_38, reshape_31] + del add_38, reshape_31 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_78, einsum_79, einsum_80 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_27, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_27 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_104, + split_105, + ) = einsum_79 + del einsum_79 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_106, + split_107, + ) = einsum_80 + del einsum_80 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(einsum_78, full_int_array_7) + del einsum_78 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + reshape_32, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_32 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(slice_4, full_int_array_9) + del slice_4 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_4 = paddle._C_ops.index_select(reshape_33, arange_2, 3) + del reshape_33 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_39 = paddle._C_ops.add(reshape_28, parameter_129) + del parameter_129, reshape_28 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_28 = [add_39, parameter_127] + del add_39, parameter_127 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_81, einsum_82, einsum_83 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_28, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_28 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_108, + split_109, + ) = einsum_82 + del einsum_82 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_110, + split_111, + ) = einsum_83 + del einsum_83 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_29 = [cast_5, einsum_81] + del einsum_81 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_84, einsum_85, einsum_86 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_29, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_29 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_112, + split_113, + ) = einsum_85 + del einsum_85 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_114, + split_115, + ) = einsum_86 + del einsum_86 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_40 = paddle._C_ops.add(einsum_75, index_select_4) + del einsum_75, index_select_4 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_41 = paddle._C_ops.add(add_40, einsum_84) + del add_40, einsum_84 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_8 = paddle._C_ops.scale(add_41, full_16, float("0"), True) + del add_41 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_4 = paddle._C_ops.subtract(scale_8, scale_4) + del scale_8 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_4 = paddle._C_ops.softmax(subtract_4, 3) + del subtract_4 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_30 = [dropout_36, reshape_30] + del dropout_36, reshape_30 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_87, einsum_88, einsum_89 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_30, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_30 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_116, + split_117, + ) = einsum_88 + del einsum_88 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_118, + split_119, + ) = einsum_89 + del einsum_89 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_34 = paddle._C_ops.reshape(einsum_87, full_int_array_10) + del einsum_87 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_31 = [reshape_34, parameter_132] + del parameter_132, reshape_34 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_90, einsum_91, einsum_92 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_31, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_31 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_120, + split_121, + ) = einsum_91 + del einsum_91 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_122, + split_123, + ) = einsum_92 + del einsum_92 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_90, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_90 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_42 = paddle._C_ops.add(dropout_38, layer_norm_21) + del dropout_38, layer_norm_21 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_126, parameter_125, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_42, parameter_125, parameter_126 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_28 = paddle._C_ops.matmul(layer_norm_24, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_43 = paddle._C_ops.add(matmul_28, parameter_121) + del matmul_28, parameter_121 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_4 = paddle._C_ops.relu(add_43) + del add_43 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_4 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_29 = paddle._C_ops.matmul(dropout_40, parameter_120, False, False) + del dropout_40, parameter_120 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_29, parameter_119) + del matmul_29, parameter_119 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_44, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_44 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_45 = paddle._C_ops.add(dropout_42, layer_norm_24) + del dropout_42, layer_norm_24 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_124, parameter_123, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_45, parameter_123, parameter_124 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_27, parameter_118, False, False) + del parameter_118 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(matmul_30, full_int_array_5) + del matmul_30 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_27, parameter_117, False, False) + del parameter_117 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_31, full_int_array_5) + del matmul_31 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_27, parameter_116, False, False) + del parameter_116 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_32, full_int_array_5) + del matmul_32 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(dropout_2, parameter_114, False, False) + del parameter_114 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_33, full_int_array_6) + del matmul_33 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_46 = paddle._C_ops.add(reshape_35, parameter_111) + del parameter_111 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_32 = [add_46, reshape_36] + del add_46, reshape_36 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_93, einsum_94, einsum_95 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_32, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_32 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_124, + split_125, + ) = einsum_94 + del einsum_94 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_126, + split_127, + ) = einsum_95 + del einsum_95 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_47 = paddle._C_ops.add(reshape_35, parameter_113) + del parameter_113 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_33 = [add_47, reshape_38] + del add_47, reshape_38 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_96, einsum_97, einsum_98 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_33, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_33 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_128, + split_129, + ) = einsum_97 + del einsum_97 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_130, + split_131, + ) = einsum_98 + del einsum_98 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(einsum_96, full_int_array_7) + del einsum_96 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + reshape_39, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_39 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(slice_5, full_int_array_9) + del slice_5 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_5 = paddle._C_ops.index_select(reshape_40, arange_2, 3) + del reshape_40 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_48 = paddle._C_ops.add(reshape_35, parameter_112) + del parameter_112, reshape_35 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_34 = [add_48, parameter_110] + del add_48, parameter_110 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_99, einsum_100, einsum_101 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_34, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_34 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_132, + split_133, + ) = einsum_100 + del einsum_100 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_134, + split_135, + ) = einsum_101 + del einsum_101 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_35 = [cast_5, einsum_99] + del einsum_99 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_102, einsum_103, einsum_104 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_35, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_35 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_136, + split_137, + ) = einsum_103 + del einsum_103 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_138, + split_139, + ) = einsum_104 + del einsum_104 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_49 = paddle._C_ops.add(einsum_93, index_select_5) + del einsum_93, index_select_5 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_50 = paddle._C_ops.add(add_49, einsum_102) + del add_49, einsum_102 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_9 = paddle._C_ops.scale(add_50, full_16, float("0"), True) + del add_50 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_5 = paddle._C_ops.subtract(scale_9, scale_4) + del scale_9 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_5 = paddle._C_ops.softmax(subtract_5, 3) + del subtract_5 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_36 = [dropout_44, reshape_37] + del dropout_44, reshape_37 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_105, einsum_106, einsum_107 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_36, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_36 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_140, + split_141, + ) = einsum_106 + del einsum_106 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_142, + split_143, + ) = einsum_107 + del einsum_107 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(einsum_105, full_int_array_10) + del einsum_105 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_37 = [reshape_41, parameter_115] + del parameter_115, reshape_41 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_108, einsum_109, einsum_110 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_37, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_37 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_144, + split_145, + ) = einsum_109 + del einsum_109 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_146, + split_147, + ) = einsum_110 + del einsum_110 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_108, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_108 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_51 = paddle._C_ops.add(dropout_46, layer_norm_27) + del dropout_46, layer_norm_27 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_51, parameter_109, parameter_108, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_51, parameter_108, parameter_109 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_30, parameter_105, False, False) + del parameter_105 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_52 = paddle._C_ops.add(matmul_34, parameter_104) + del matmul_34, parameter_104 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_5 = paddle._C_ops.relu(add_52) + del add_52 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_5 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_35 = paddle._C_ops.matmul(dropout_48, parameter_103, False, False) + del dropout_48, parameter_103 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_53 = paddle._C_ops.add(matmul_35, parameter_102) + del matmul_35, parameter_102 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_53, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_53 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_54 = paddle._C_ops.add(dropout_50, layer_norm_30) + del dropout_50, layer_norm_30 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_54, parameter_107, parameter_106, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_54, parameter_106, parameter_107 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_33, parameter_101, False, False) + del parameter_101 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_36, full_int_array_5) + del matmul_36 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_37 = paddle._C_ops.matmul(layer_norm_33, parameter_100, False, False) + del parameter_100 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(matmul_37, full_int_array_5) + del matmul_37 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_33, parameter_99, False, False) + del parameter_99 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_38, full_int_array_5) + del matmul_38 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_39 = paddle._C_ops.matmul(dropout_2, parameter_97, False, False) + del parameter_97 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_39, full_int_array_6) + del matmul_39 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_55 = paddle._C_ops.add(reshape_42, parameter_94) + del parameter_94 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_38 = [add_55, reshape_43] + del add_55, reshape_43 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_111, einsum_112, einsum_113 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_38, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_38 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_148, + split_149, + ) = einsum_112 + del einsum_112 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_150, + split_151, + ) = einsum_113 + del einsum_113 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_56 = paddle._C_ops.add(reshape_42, parameter_96) + del parameter_96 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_39 = [add_56, reshape_45] + del add_56, reshape_45 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_114, einsum_115, einsum_116 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_39, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_39 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_152, + split_153, + ) = einsum_115 + del einsum_115 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_154, + split_155, + ) = einsum_116 + del einsum_116 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(einsum_114, full_int_array_7) + del einsum_114 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + reshape_46, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_46 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(slice_6, full_int_array_9) + del slice_6 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_6 = paddle._C_ops.index_select(reshape_47, arange_2, 3) + del reshape_47 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_57 = paddle._C_ops.add(reshape_42, parameter_95) + del parameter_95, reshape_42 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_40 = [add_57, parameter_93] + del add_57, parameter_93 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_117, einsum_118, einsum_119 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_40, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_40 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_156, + split_157, + ) = einsum_118 + del einsum_118 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_158, + split_159, + ) = einsum_119 + del einsum_119 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_41 = [cast_5, einsum_117] + del einsum_117 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_120, einsum_121, einsum_122 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_41, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_41 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_160, + split_161, + ) = einsum_121 + del einsum_121 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_162, + split_163, + ) = einsum_122 + del einsum_122 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_58 = paddle._C_ops.add(einsum_111, index_select_6) + del einsum_111, index_select_6 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_59 = paddle._C_ops.add(add_58, einsum_120) + del add_58, einsum_120 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_10 = paddle._C_ops.scale(add_59, full_16, float("0"), True) + del add_59 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_6 = paddle._C_ops.subtract(scale_10, scale_4) + del scale_10 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_6 = paddle._C_ops.softmax(subtract_6, 3) + del subtract_6 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_42 = [dropout_52, reshape_44] + del dropout_52, reshape_44 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_123, einsum_124, einsum_125 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_42, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_42 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_164, + split_165, + ) = einsum_124 + del einsum_124 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_166, + split_167, + ) = einsum_125 + del einsum_125 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(einsum_123, full_int_array_10) + del einsum_123 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_43 = [reshape_48, parameter_98] + del parameter_98, reshape_48 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_126, einsum_127, einsum_128 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_43, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_43 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_168, + split_169, + ) = einsum_127 + del einsum_127 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_170, + split_171, + ) = einsum_128 + del einsum_128 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_126, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_126 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_60 = paddle._C_ops.add(dropout_54, layer_norm_33) + del dropout_54, layer_norm_33 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_60, parameter_92, parameter_91, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_60, parameter_91, parameter_92 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_36, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_61 = paddle._C_ops.add(matmul_40, parameter_87) + del matmul_40, parameter_87 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_6 = paddle._C_ops.relu(add_61) + del add_61 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_6 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_41 = paddle._C_ops.matmul(dropout_56, parameter_86, False, False) + del dropout_56, parameter_86 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_62 = paddle._C_ops.add(matmul_41, parameter_85) + del matmul_41, parameter_85 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_62, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_62 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_63 = paddle._C_ops.add(dropout_58, layer_norm_36) + del dropout_58, layer_norm_36 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_63, parameter_90, parameter_89, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_63, parameter_89, parameter_90 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_39, parameter_84, False, False) + del parameter_84 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(matmul_42, full_int_array_5) + del matmul_42 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_39, parameter_83, False, False) + del parameter_83 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(matmul_43, full_int_array_5) + del matmul_43 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_44 = paddle._C_ops.matmul(layer_norm_39, parameter_82, False, False) + del parameter_82 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_51 = paddle._C_ops.reshape(matmul_44, full_int_array_5) + del matmul_44 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_45 = paddle._C_ops.matmul(dropout_2, parameter_80, False, False) + del parameter_80 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(matmul_45, full_int_array_6) + del matmul_45 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_64 = paddle._C_ops.add(reshape_49, parameter_77) + del parameter_77 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_44 = [add_64, reshape_50] + del add_64, reshape_50 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_129, einsum_130, einsum_131 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_44, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_44 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_172, + split_173, + ) = einsum_130 + del einsum_130 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_174, + split_175, + ) = einsum_131 + del einsum_131 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_65 = paddle._C_ops.add(reshape_49, parameter_79) + del parameter_79 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_45 = [add_65, reshape_52] + del add_65, reshape_52 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_132, einsum_133, einsum_134 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_45, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_45 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_176, + split_177, + ) = einsum_133 + del einsum_133 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_178, + split_179, + ) = einsum_134 + del einsum_134 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(einsum_132, full_int_array_7) + del einsum_132 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + reshape_53, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_53 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(slice_7, full_int_array_9) + del slice_7 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_7 = paddle._C_ops.index_select(reshape_54, arange_2, 3) + del reshape_54 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_66 = paddle._C_ops.add(reshape_49, parameter_78) + del parameter_78, reshape_49 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_46 = [add_66, parameter_76] + del add_66, parameter_76 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_135, einsum_136, einsum_137 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_46, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_46 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_180, + split_181, + ) = einsum_136 + del einsum_136 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_182, + split_183, + ) = einsum_137 + del einsum_137 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_47 = [cast_5, einsum_135] + del einsum_135 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_138, einsum_139, einsum_140 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_47, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_47 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_184, + split_185, + ) = einsum_139 + del einsum_139 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_186, + split_187, + ) = einsum_140 + del einsum_140 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_67 = paddle._C_ops.add(einsum_129, index_select_7) + del einsum_129, index_select_7 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_68 = paddle._C_ops.add(add_67, einsum_138) + del add_67, einsum_138 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_11 = paddle._C_ops.scale(add_68, full_16, float("0"), True) + del add_68 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_7 = paddle._C_ops.subtract(scale_11, scale_4) + del scale_11 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_7 = paddle._C_ops.softmax(subtract_7, 3) + del subtract_7 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_48 = [dropout_60, reshape_51] + del dropout_60, reshape_51 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_141, einsum_142, einsum_143 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_48, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_48 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_188, + split_189, + ) = einsum_142 + del einsum_142 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_190, + split_191, + ) = einsum_143 + del einsum_143 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(einsum_141, full_int_array_10) + del einsum_141 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_49 = [reshape_55, parameter_81] + del parameter_81, reshape_55 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_144, einsum_145, einsum_146 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_49, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_49 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_192, + split_193, + ) = einsum_145 + del einsum_145 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_194, + split_195, + ) = einsum_146 + del einsum_146 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_144, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_144 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_69 = paddle._C_ops.add(dropout_62, layer_norm_39) + del dropout_62, layer_norm_39 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_69, parameter_75, parameter_74, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_69, parameter_74, parameter_75 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_42, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_70 = paddle._C_ops.add(matmul_46, parameter_70) + del matmul_46, parameter_70 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_7 = paddle._C_ops.relu(add_70) + del add_70 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_7 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_47 = paddle._C_ops.matmul(dropout_64, parameter_69, False, False) + del dropout_64, parameter_69 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_71 = paddle._C_ops.add(matmul_47, parameter_68) + del matmul_47, parameter_68 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_71, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_71 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_72 = paddle._C_ops.add(dropout_66, layer_norm_42) + del dropout_66, layer_norm_42 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_72, parameter_73, parameter_72, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_72, parameter_72, parameter_73 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_45, parameter_67, False, False) + del parameter_67 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(matmul_48, full_int_array_5) + del matmul_48 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_45, parameter_66, False, False) + del parameter_66 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(matmul_49, full_int_array_5) + del matmul_49 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_45, parameter_65, False, False) + del parameter_65 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(matmul_50, full_int_array_5) + del matmul_50 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(dropout_2, parameter_63, False, False) + del parameter_63 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_59 = paddle._C_ops.reshape(matmul_51, full_int_array_6) + del matmul_51 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_73 = paddle._C_ops.add(reshape_56, parameter_60) + del parameter_60 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_50 = [add_73, reshape_57] + del add_73, reshape_57 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_147, einsum_148, einsum_149 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_50, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_50 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_196, + split_197, + ) = einsum_148 + del einsum_148 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_198, + split_199, + ) = einsum_149 + del einsum_149 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_74 = paddle._C_ops.add(reshape_56, parameter_62) + del parameter_62 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_51 = [add_74, reshape_59] + del add_74, reshape_59 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_150, einsum_151, einsum_152 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_51, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_51 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_200, + split_201, + ) = einsum_151 + del einsum_151 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_202, + split_203, + ) = einsum_152 + del einsum_152 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(einsum_150, full_int_array_7) + del einsum_150 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + reshape_60, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_60 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(slice_8, full_int_array_9) + del slice_8 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_8 = paddle._C_ops.index_select(reshape_61, arange_2, 3) + del reshape_61 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_75 = paddle._C_ops.add(reshape_56, parameter_61) + del parameter_61, reshape_56 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_52 = [add_75, parameter_59] + del add_75, parameter_59 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_153, einsum_154, einsum_155 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_52, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_52 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_204, + split_205, + ) = einsum_154 + del einsum_154 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_206, + split_207, + ) = einsum_155 + del einsum_155 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_53 = [cast_5, einsum_153] + del einsum_153 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_156, einsum_157, einsum_158 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_53, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_53 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_208, + split_209, + ) = einsum_157 + del einsum_157 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_210, + split_211, + ) = einsum_158 + del einsum_158 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_76 = paddle._C_ops.add(einsum_147, index_select_8) + del einsum_147, index_select_8 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_77 = paddle._C_ops.add(add_76, einsum_156) + del add_76, einsum_156 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_12 = paddle._C_ops.scale(add_77, full_16, float("0"), True) + del add_77 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_8 = paddle._C_ops.subtract(scale_12, scale_4) + del scale_12 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_8 = paddle._C_ops.softmax(subtract_8, 3) + del subtract_8 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_54 = [dropout_68, reshape_58] + del dropout_68, reshape_58 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_159, einsum_160, einsum_161 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_54, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_54 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_212, + split_213, + ) = einsum_160 + del einsum_160 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_214, + split_215, + ) = einsum_161 + del einsum_161 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_62 = paddle._C_ops.reshape(einsum_159, full_int_array_10) + del einsum_159 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_55 = [reshape_62, parameter_64] + del parameter_64, reshape_62 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_162, einsum_163, einsum_164 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_55, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_55 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_216, + split_217, + ) = einsum_163 + del einsum_163 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_218, + split_219, + ) = einsum_164 + del einsum_164 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_162, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_162 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_78 = paddle._C_ops.add(dropout_70, layer_norm_45) + del dropout_70, layer_norm_45 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_78, parameter_58, parameter_57, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_78, parameter_57, parameter_58 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_52 = paddle._C_ops.matmul(layer_norm_48, parameter_54, False, False) + del parameter_54 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_79 = paddle._C_ops.add(matmul_52, parameter_53) + del matmul_52, parameter_53 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_8 = paddle._C_ops.relu(add_79) + del add_79 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_8 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_53 = paddle._C_ops.matmul(dropout_72, parameter_52, False, False) + del dropout_72, parameter_52 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_53, parameter_51) + del matmul_53, parameter_51 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_80, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_80 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_81 = paddle._C_ops.add(dropout_74, layer_norm_48) + del dropout_74, layer_norm_48 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_56, parameter_55, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_81, parameter_55, parameter_56 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_51, parameter_50, False, False) + del parameter_50 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(matmul_54, full_int_array_5) + del matmul_54 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_51, parameter_49, False, False) + del parameter_49 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(matmul_55, full_int_array_5) + del matmul_55 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_51, parameter_48, False, False) + del parameter_48 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(matmul_56, full_int_array_5) + del matmul_56 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(dropout_2, parameter_46, False, False) + del parameter_46 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(matmul_57, full_int_array_6) + del matmul_57 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_82 = paddle._C_ops.add(reshape_63, parameter_43) + del parameter_43 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_56 = [add_82, reshape_64] + del add_82, reshape_64 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_165, einsum_166, einsum_167 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_56, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_56 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_220, + split_221, + ) = einsum_166 + del einsum_166 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_222, + split_223, + ) = einsum_167 + del einsum_167 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_83 = paddle._C_ops.add(reshape_63, parameter_45) + del parameter_45 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_57 = [add_83, reshape_66] + del add_83, reshape_66 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_168, einsum_169, einsum_170 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_57, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_57 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_224, + split_225, + ) = einsum_169 + del einsum_169 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_226, + split_227, + ) = einsum_170 + del einsum_170 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(einsum_168, full_int_array_7) + del einsum_168 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + reshape_67, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_67 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(slice_9, full_int_array_9) + del slice_9 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_9 = paddle._C_ops.index_select(reshape_68, arange_2, 3) + del reshape_68 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_84 = paddle._C_ops.add(reshape_63, parameter_44) + del parameter_44, reshape_63 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_58 = [add_84, parameter_42] + del add_84, parameter_42 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_171, einsum_172, einsum_173 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_58, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_58 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_228, + split_229, + ) = einsum_172 + del einsum_172 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_230, + split_231, + ) = einsum_173 + del einsum_173 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_59 = [cast_5, einsum_171] + del einsum_171 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_174, einsum_175, einsum_176 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_59, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_59 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_232, + split_233, + ) = einsum_175 + del einsum_175 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_234, + split_235, + ) = einsum_176 + del einsum_176 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_85 = paddle._C_ops.add(einsum_165, index_select_9) + del einsum_165, index_select_9 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_86 = paddle._C_ops.add(add_85, einsum_174) + del add_85, einsum_174 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_13 = paddle._C_ops.scale(add_86, full_16, float("0"), True) + del add_86 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_9 = paddle._C_ops.subtract(scale_13, scale_4) + del scale_13 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_9 = paddle._C_ops.softmax(subtract_9, 3) + del subtract_9 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_60 = [dropout_76, reshape_65] + del dropout_76, reshape_65 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_177, einsum_178, einsum_179 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_60, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_60 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_236, + split_237, + ) = einsum_178 + del einsum_178 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_238, + split_239, + ) = einsum_179 + del einsum_179 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(einsum_177, full_int_array_10) + del einsum_177 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_61 = [reshape_69, parameter_47] + del parameter_47, reshape_69 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_180, einsum_181, einsum_182 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_61, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_61 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_240, + split_241, + ) = einsum_181 + del einsum_181 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_242, + split_243, + ) = einsum_182 + del einsum_182 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_180, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_180 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_87 = paddle._C_ops.add(dropout_78, layer_norm_51) + del dropout_78, layer_norm_51 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_41, parameter_40, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_87, parameter_40, parameter_41 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_54, parameter_37, False, False) + del parameter_37 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_88 = paddle._C_ops.add(matmul_58, parameter_36) + del matmul_58, parameter_36 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_9 = paddle._C_ops.relu(add_88) + del add_88 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_9 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_59 = paddle._C_ops.matmul(dropout_80, parameter_35, False, False) + del dropout_80, parameter_35 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_89 = paddle._C_ops.add(matmul_59, parameter_34) + del matmul_59, parameter_34 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_89, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_89 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_90 = paddle._C_ops.add(dropout_82, layer_norm_54) + del dropout_82, layer_norm_54 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_39, parameter_38, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_90, parameter_38, parameter_39 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_57, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(matmul_60, full_int_array_5) + del matmul_60 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_61 = paddle._C_ops.matmul(layer_norm_57, parameter_32, False, False) + del parameter_32 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(matmul_61, full_int_array_5) + del matmul_61 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_57, parameter_31, False, False) + del parameter_31 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_72 = paddle._C_ops.reshape(matmul_62, full_int_array_5) + del matmul_62 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_63 = paddle._C_ops.matmul(dropout_2, parameter_29, False, False) + del parameter_29 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(matmul_63, full_int_array_6) + del matmul_63 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_91 = paddle._C_ops.add(reshape_70, parameter_26) + del parameter_26 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_62 = [add_91, reshape_71] + del add_91, reshape_71 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_183, einsum_184, einsum_185 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_62, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_62 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_244, + split_245, + ) = einsum_184 + del einsum_184 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_246, + split_247, + ) = einsum_185 + del einsum_185 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_92 = paddle._C_ops.add(reshape_70, parameter_28) + del parameter_28 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_63 = [add_92, reshape_73] + del add_92, reshape_73 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_186, einsum_187, einsum_188 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_63, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_63 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_248, + split_249, + ) = einsum_187 + del einsum_187 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_250, + split_251, + ) = einsum_188 + del einsum_188 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(einsum_186, full_int_array_7) + del einsum_186 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + reshape_74, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_74 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(slice_10, full_int_array_9) + del slice_10 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_10 = paddle._C_ops.index_select(reshape_75, arange_2, 3) + del reshape_75 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_93 = paddle._C_ops.add(reshape_70, parameter_27) + del parameter_27, reshape_70 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_64 = [add_93, parameter_25] + del add_93, parameter_25 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_189, einsum_190, einsum_191 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_64, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_64 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_252, + split_253, + ) = einsum_190 + del einsum_190 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_254, + split_255, + ) = einsum_191 + del einsum_191 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_65 = [cast_5, einsum_189] + del einsum_189 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_192, einsum_193, einsum_194 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_65, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_65 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_256, + split_257, + ) = einsum_193 + del einsum_193 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_258, + split_259, + ) = einsum_194 + del einsum_194 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_94 = paddle._C_ops.add(einsum_183, index_select_10) + del einsum_183, index_select_10 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_95 = paddle._C_ops.add(add_94, einsum_192) + del add_94, einsum_192 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_14 = paddle._C_ops.scale(add_95, full_16, float("0"), True) + del add_95 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_10 = paddle._C_ops.subtract(scale_14, scale_4) + del scale_14 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_10 = paddle._C_ops.softmax(subtract_10, 3) + del subtract_10 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_66 = [dropout_84, reshape_72] + del dropout_84, reshape_72 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_195, einsum_196, einsum_197 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_66, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_66 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_260, + split_261, + ) = einsum_196 + del einsum_196 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_262, + split_263, + ) = einsum_197 + del einsum_197 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(einsum_195, full_int_array_10) + del einsum_195 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_67 = [reshape_76, parameter_30] + del parameter_30, reshape_76 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_198, einsum_199, einsum_200 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_67, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_67 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_264, + split_265, + ) = einsum_199 + del einsum_199 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_266, + split_267, + ) = einsum_200 + del einsum_200 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_198, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_198 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_96 = paddle._C_ops.add(dropout_86, layer_norm_57) + del dropout_86, layer_norm_57 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_96, parameter_24, parameter_23, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_96, parameter_23, parameter_24 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_60, parameter_20, False, False) + del parameter_20 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_97 = paddle._C_ops.add(matmul_64, parameter_19) + del matmul_64, parameter_19 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_10 = paddle._C_ops.relu(add_97) + del add_97 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_10 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_65 = paddle._C_ops.matmul(dropout_88, parameter_18, False, False) + del dropout_88, parameter_18 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_98 = paddle._C_ops.add(matmul_65, parameter_17) + del matmul_65, parameter_17 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_98, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_98 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_99 = paddle._C_ops.add(dropout_90, layer_norm_60) + del dropout_90, layer_norm_60 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_99, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_99, parameter_21, parameter_22 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_63, parameter_16, False, False) + del parameter_16 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(matmul_66, full_int_array_5) + del matmul_66 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_63, parameter_15, False, False) + del parameter_15 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(matmul_67, full_int_array_5) + del matmul_67 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_68 = paddle._C_ops.matmul(layer_norm_63, parameter_14, False, False) + del parameter_14 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(matmul_68, full_int_array_5) + del full_int_array_5, matmul_68 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(dropout_2, parameter_12, False, False) + del dropout_2, parameter_12 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_80 = paddle._C_ops.reshape(matmul_69, full_int_array_6) + del full_int_array_6, matmul_69 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_100 = paddle._C_ops.add(reshape_77, parameter_9) + del parameter_9 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_68 = [add_100, reshape_78] + del add_100, reshape_78 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_201, einsum_202, einsum_203 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_68, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_68 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_268, + split_269, + ) = einsum_202 + del einsum_202 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_270, + split_271, + ) = einsum_203 + del einsum_203 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_101 = paddle._C_ops.add(reshape_77, parameter_11) + del parameter_11 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_69 = [add_101, reshape_80] + del add_101, reshape_80 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_204, einsum_205, einsum_206 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_69, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_69 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_272, + split_273, + ) = einsum_205 + del einsum_205 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_274, + split_275, + ) = einsum_206 + del einsum_206 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(einsum_204, full_int_array_7) + del einsum_204, full_int_array_7 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + reshape_81, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del full_int_array_3, full_int_array_8, reshape_81 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(slice_11, full_int_array_9) + del full_int_array_9, slice_11 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_11 = paddle._C_ops.index_select(reshape_82, arange_2, 3) + del arange_2, reshape_82 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_102 = paddle._C_ops.add(reshape_77, parameter_10) + del parameter_10, reshape_77 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_70 = [add_102, parameter_8] + del add_102, parameter_8 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_207, einsum_208, einsum_209 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_70, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_70 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_276, + split_277, + ) = einsum_208 + del einsum_208 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_278, + split_279, + ) = einsum_209 + del einsum_209 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_71 = [cast_5, einsum_207] + del cast_5, einsum_207 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_210, einsum_211, einsum_212 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_71, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_71 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_280, + split_281, + ) = einsum_211 + del einsum_211 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_282, + split_283, + ) = einsum_212 + del einsum_212 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_103 = paddle._C_ops.add(einsum_201, index_select_11) + del einsum_201, index_select_11 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_104 = paddle._C_ops.add(add_103, einsum_210) + del add_103, einsum_210 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_15 = paddle._C_ops.scale(add_104, full_16, float("0"), True) + del add_104, full_16 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_11 = paddle._C_ops.subtract(scale_15, scale_4) + del scale_15, scale_4 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_11 = paddle._C_ops.softmax(subtract_11, 3) + del subtract_11 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_72 = [dropout_92, reshape_79] + del dropout_92, reshape_79 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_213, einsum_214, einsum_215 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_72, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_72 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_284, + split_285, + ) = einsum_214 + del einsum_214 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_286, + split_287, + ) = einsum_215 + del einsum_215 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(einsum_213, full_int_array_10) + del einsum_213, full_int_array_10 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_73 = [reshape_83, parameter_13] + del parameter_13, reshape_83 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_216, einsum_217, einsum_218 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_73, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_73 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_288, + split_289, + ) = einsum_217 + del einsum_217 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_290, + split_291, + ) = einsum_218 + del einsum_218 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_216, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_216 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_105 = paddle._C_ops.add(dropout_94, layer_norm_63) + del dropout_94, layer_norm_63 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_105, parameter_7, parameter_6, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_105, parameter_6, parameter_7 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_66, parameter_3, False, False) + del parameter_3 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_106 = paddle._C_ops.add(matmul_70, parameter_2) + del matmul_70, parameter_2 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_11 = paddle._C_ops.relu(add_106) + del add_106 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_11 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(dropout_96, parameter_1, False, False) + del dropout_96, parameter_1 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_107 = paddle._C_ops.add(matmul_71, parameter_0) + del matmul_71, parameter_0 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_107, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_107 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_108 = paddle._C_ops.add(dropout_98, layer_norm_66) + del dropout_98, layer_norm_66 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_108, parameter_5, parameter_4, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_108, parameter_4, parameter_5 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_69, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_3, layer_norm_69 + + # pd_op.transpose: (1x9x768xf32) <- (9x1x768xf32) + transpose_0 = paddle._C_ops.transpose(dropout_100, [1, 0, 2]) + del dropout_100 + + return transpose_0 diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-base/weight_meta.py b/paddle_samples/PaddleNLP/chinese-xlnet-base/weight_meta.py new file mode 100644 index 000000000..5dbd9da9b --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-base/weight_meta.py @@ -0,0 +1,2048 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0998714") + max_val = float("0.0984303") + mean = float("-3.03961e-06") + std = float("0.0200015") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101221") + max_val = float("0.0987751") + mean = float("1.26591e-05") + std = float("0.0200075") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.056897") + max_val = float("0.0570167") + mean = float("-0.00052722") + std = float("0.0200559") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0533698") + max_val = float("0.0668244") + mean = float("0.00017133") + std = float("0.0196393") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0652753") + max_val = float("0.0590219") + mean = float("0.000199135") + std = float("0.0198334") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0638812") + max_val = float("0.0620904") + mean = float("-0.00144631") + std = float("0.0201184") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0939125") + max_val = float("0.0880784") + mean = float("-6.9476e-06") + std = float("0.0200321") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0923562") + max_val = float("0.0888936") + mean = float("-2.9202e-06") + std = float("0.0199792") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101688") + max_val = float("0.0916663") + mean = float("-2.96884e-05") + std = float("0.0199948") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0873907") + max_val = float("0.104748") + mean = float("3.4997e-05") + std = float("0.020015") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.092884") + max_val = float("0.0915055") + mean = float("1.42978e-05") + std = float("0.0199916") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0989158") + max_val = float("0.0984304") + mean = float("-4.01166e-06") + std = float("0.0199911") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0962958") + max_val = float("0.105661") + mean = float("1.3769e-06") + std = float("0.02") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.063664") + max_val = float("0.0613667") + mean = float("0.000547421") + std = float("0.0200497") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0671897") + max_val = float("0.0783652") + mean = float("0.000622001") + std = float("0.0202513") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0725683") + max_val = float("0.0650216") + mean = float("-0.000784367") + std = float("0.0203172") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0685982") + max_val = float("0.0590483") + mean = float("-0.000134032") + std = float("0.020622") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0935638") + max_val = float("0.0981755") + mean = float("-1.56042e-05") + std = float("0.0200175") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0918844") + max_val = float("0.0967983") + mean = float("5.58553e-05") + std = float("0.0200067") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0910037") + max_val = float("0.104207") + mean = float("6.23767e-06") + std = float("0.0199933") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0933517") + max_val = float("0.0979902") + mean = float("2.63145e-05") + std = float("0.0200088") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0989605") + max_val = float("0.100781") + mean = float("-5.33705e-05") + std = float("0.020009") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.10758") + max_val = float("0.10164") + mean = float("-1.782e-05") + std = float("0.0199951") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101564") + max_val = float("0.105164") + mean = float("3.94919e-06") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0743405") + max_val = float("0.0580922") + mean = float("-0.000553093") + std = float("0.0199521") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0554053") + max_val = float("0.056885") + mean = float("0.00102951") + std = float("0.0197574") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0579967") + max_val = float("0.080138") + mean = float("0.000286156") + std = float("0.0195506") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0635722") + max_val = float("0.054928") + mean = float("-0.00117185") + std = float("0.0204126") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.097754") + max_val = float("0.100356") + mean = float("1.25839e-05") + std = float("0.0200035") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0978946") + max_val = float("0.0969981") + mean = float("3.99889e-06") + std = float("0.0200023") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0894704") + max_val = float("0.0943186") + mean = float("-1.14581e-05") + std = float("0.0200001") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0902368") + max_val = float("0.0955309") + mean = float("-1.5804e-06") + std = float("0.0200062") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0894124") + max_val = float("0.0937951") + mean = float("-6.2392e-05") + std = float("0.0200184") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0979337") + max_val = float("0.0947495") + mean = float("9.3209e-06") + std = float("0.0199896") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0956268") + max_val = float("0.102042") + mean = float("-3.11307e-06") + std = float("0.0199991") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0600446") + max_val = float("0.0551249") + mean = float("0.000668725") + std = float("0.0200145") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0603542") + max_val = float("0.0639203") + mean = float("0.000876608") + std = float("0.0203053") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0648046") + max_val = float("0.0670915") + mean = float("5.73312e-05") + std = float("0.0199847") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0718373") + max_val = float("0.0617058") + mean = float("9.69534e-05") + std = float("0.0200235") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0998177") + max_val = float("0.100939") + mean = float("-2.60804e-05") + std = float("0.0200047") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0948367") + max_val = float("0.0902295") + mean = float("5.64852e-05") + std = float("0.0200064") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0975177") + max_val = float("0.0945639") + mean = float("3.33007e-05") + std = float("0.0200165") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0932207") + max_val = float("0.0939244") + mean = float("-3.06693e-05") + std = float("0.0199697") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0910764") + max_val = float("0.0979006") + mean = float("3.5443e-05") + std = float("0.0199895") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.10246") + max_val = float("0.102432") + mean = float("2.51644e-05") + std = float("0.0200064") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0979288") + max_val = float("0.110267") + mean = float("-1.96634e-06") + std = float("0.0200013") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0639689") + max_val = float("0.0547493") + mean = float("-0.0010852") + std = float("0.0195674") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0639137") + max_val = float("0.0712681") + mean = float("0.000938638") + std = float("0.0206526") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0617112") + max_val = float("0.0581551") + mean = float("-0.000829282") + std = float("0.0186566") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0720919") + max_val = float("0.0569111") + mean = float("0.000933793") + std = float("0.0197778") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0901267") + max_val = float("0.0963059") + mean = float("-3.99305e-05") + std = float("0.0200038") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0919104") + max_val = float("0.0971055") + mean = float("2.00371e-05") + std = float("0.020012") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0979838") + max_val = float("0.0884718") + mean = float("-3.46436e-05") + std = float("0.0199821") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.10039") + max_val = float("0.0876718") + mean = float("-1.1278e-05") + std = float("0.0199789") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0914442") + max_val = float("0.111778") + mean = float("-2.99018e-05") + std = float("0.0200296") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0993673") + max_val = float("0.0954033") + mean = float("2.0312e-05") + std = float("0.0200069") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.109231") + max_val = float("0.104778") + mean = float("-1.63055e-06") + std = float("0.0199936") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0646925") + max_val = float("0.0661256") + mean = float("8.01721e-05") + std = float("0.0203713") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0627554") + max_val = float("0.0622741") + mean = float("0.000668161") + std = float("0.0203228") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0649528") + max_val = float("0.0575164") + mean = float("-0.00103082") + std = float("0.0206422") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0618572") + max_val = float("0.0741325") + mean = float("4.17254e-05") + std = float("0.0201355") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0940977") + max_val = float("0.0906086") + mean = float("2.22085e-05") + std = float("0.0199948") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0898018") + max_val = float("0.0921712") + mean = float("-7.91858e-06") + std = float("0.0200158") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0930985") + max_val = float("0.0904702") + mean = float("1.22401e-05") + std = float("0.0200171") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916543") + max_val = float("0.0928527") + mean = float("-2.07254e-05") + std = float("0.0200222") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.091229") + max_val = float("0.0979338") + mean = float("-2.48638e-05") + std = float("0.0199802") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0977989") + max_val = float("0.0964952") + mean = float("-1.39183e-05") + std = float("0.0200033") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0993583") + max_val = float("0.0992722") + mean = float("-5.68789e-06") + std = float("0.0200234") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0626282") + max_val = float("0.0639193") + mean = float("-0.000134828") + std = float("0.0202296") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0610922") + max_val = float("0.08835") + mean = float("0.000520116") + std = float("0.0209154") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0630409") + max_val = float("0.0590075") + mean = float("0.000529107") + std = float("0.0189061") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0639899") + max_val = float("0.0549799") + mean = float("-0.000347124") + std = float("0.0196525") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0921856") + max_val = float("0.106383") + mean = float("-1.21788e-05") + std = float("0.0200174") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0951978") + max_val = float("0.0910537") + mean = float("-1.91198e-05") + std = float("0.0199829") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0934649") + max_val = float("0.0918414") + mean = float("2.66845e-05") + std = float("0.0200047") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0982122") + max_val = float("0.0951821") + mean = float("-1.94681e-05") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100677") + max_val = float("0.095596") + mean = float("4.64555e-05") + std = float("0.0200081") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.100304") + max_val = float("0.0992358") + mean = float("-4.44139e-06") + std = float("0.0200026") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0976562") + max_val = float("0.0991958") + mean = float("-2.74978e-05") + std = float("0.0199844") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.066919") + max_val = float("0.0784602") + mean = float("0.000554993") + std = float("0.0202607") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0579942") + max_val = float("0.0650711") + mean = float("-0.000921763") + std = float("0.0193981") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0578699") + max_val = float("0.0777063") + mean = float("0.000276614") + std = float("0.0196959") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0910947") + max_val = float("0.0641465") + mean = float("-0.000487076") + std = float("0.0202169") + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0952865") + max_val = float("0.0953403") + mean = float("-3.82647e-05") + std = float("0.0199912") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0938449") + max_val = float("0.0938034") + mean = float("-5.93409e-06") + std = float("0.0199985") + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0966507") + max_val = float("0.101359") + mean = float("-4.50886e-05") + std = float("0.0200011") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0974317") + max_val = float("0.088512") + mean = float("-1.53379e-05") + std = float("0.0199917") + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0947583") + max_val = float("0.097753") + mean = float("-3.77429e-05") + std = float("0.0199982") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0966239") + max_val = float("0.0946495") + mean = float("7.38891e-06") + std = float("0.0200077") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0990434") + max_val = float("0.0986182") + mean = float("1.60806e-06") + std = float("0.020012") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0660286") + max_val = float("0.0705801") + mean = float("-0.000426975") + std = float("0.0202835") + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0546884") + max_val = float("0.0597706") + mean = float("8.27615e-05") + std = float("0.01934") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0521603") + max_val = float("0.0543209") + mean = float("0.000320002") + std = float("0.0189905") + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.051644") + max_val = float("0.0690596") + mean = float("-5.32648e-05") + std = float("0.0192358") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101252") + max_val = float("0.0949966") + mean = float("-1.39793e-05") + std = float("0.0200079") + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0977555") + max_val = float("0.0929576") + mean = float("-2.29729e-06") + std = float("0.0199824") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0889582") + max_val = float("0.0956246") + mean = float("-6.87214e-06") + std = float("0.0200277") + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0905439") + max_val = float("0.0928013") + mean = float("-1.9406e-05") + std = float("0.0200045") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0915919") + max_val = float("0.0916602") + mean = float("2.62146e-05") + std = float("0.0200038") + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0979289") + max_val = float("0.0973227") + mean = float("-1.06642e-05") + std = float("0.0200054") + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0990122") + max_val = float("0.103254") + mean = float("-2.12928e-05") + std = float("0.0199923") + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.059159") + max_val = float("0.0750411") + mean = float("0.00127293") + std = float("0.0197122") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0592058") + max_val = float("0.0590503") + mean = float("-0.00223688") + std = float("0.0198972") + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0645318") + max_val = float("0.0622872") + mean = float("-0.000444717") + std = float("0.020224") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0533066") + max_val = float("0.066021") + mean = float("0.000328887") + std = float("0.0192417") + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.09613") + max_val = float("0.0914884") + mean = float("1.30753e-05") + std = float("0.0199968") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950543") + max_val = float("0.0963969") + mean = float("1.61388e-05") + std = float("0.0199462") + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0943406") + max_val = float("0.102827") + mean = float("2.36727e-05") + std = float("0.0200222") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.098155") + max_val = float("0.0884917") + mean = float("3.13806e-05") + std = float("0.0200088") + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0911829") + max_val = float("0.100784") + mean = float("2.40136e-05") + std = float("0.0199743") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.105951") + max_val = float("0.101723") + mean = float("-5.68485e-06") + std = float("0.0200186") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.098726") + max_val = float("0.0996512") + mean = float("-4.4365e-06") + std = float("0.0200118") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0718335") + max_val = float("0.065082") + mean = float("-5.04988e-05") + std = float("0.0197697") + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.056934") + max_val = float("0.0580286") + mean = float("9.49533e-05") + std = float("0.0193841") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0595877") + max_val = float("0.0660135") + mean = float("0.0004164") + std = float("0.0194652") + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0612096") + max_val = float("0.0827676") + mean = float("0.000251395") + std = float("0.0197037") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0846039") + max_val = float("0.0922142") + mean = float("-3.84193e-05") + std = float("0.019995") + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0926911") + max_val = float("0.0922433") + mean = float("-1.6358e-05") + std = float("0.0200194") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0978895") + max_val = float("0.0956747") + mean = float("-3.46703e-05") + std = float("0.020004") + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102104") + max_val = float("0.0910908") + mean = float("-5e-06") + std = float("0.0199864") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0892603") + max_val = float("0.100866") + mean = float("-5.00942e-05") + std = float("0.0200319") + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.106639") + max_val = float("0.108845") + mean = float("7.08545e-06") + std = float("0.0200057") + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.105093") + max_val = float("0.112508") + mean = float("-3.25768e-05") + std = float("0.0200009") + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0577629") + max_val = float("0.065642") + mean = float("0.000204452") + std = float("0.0199027") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.090207") + max_val = float("0.0611968") + mean = float("-0.00021308") + std = float("0.0197199") + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0570823") + max_val = float("0.0768646") + mean = float("0.000430867") + std = float("0.0196956") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0662148") + max_val = float("0.0569416") + mean = float("0.000616465") + std = float("0.0208327") + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102001") + max_val = float("0.096987") + mean = float("-2.67564e-05") + std = float("0.0199838") + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0906012") + max_val = float("0.0902717") + mean = float("2.2588e-05") + std = float("0.0199944") + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.091711") + max_val = float("0.111538") + mean = float("-1.27522e-05") + std = float("0.0199702") + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0936883") + max_val = float("0.0957427") + mean = float("-2.19867e-05") + std = float("0.0200179") + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0904621") + max_val = float("0.0907767") + mean = float("-4.65418e-05") + std = float("0.0200181") + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [32000, 768] + dtype = "float32" + min_val = float("-0.110721") + max_val = float("0.108419") + mean = float("3.69692e-06") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [1, 1, 768] + dtype = "float32" + min_val = float("-0.0660402") + max_val = float("0.0637117") + mean = float("-0.000634141") + std = float("0.0197734") + data = None diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-large/graph_net.json b/paddle_samples/PaddleNLP/chinese-xlnet-large/graph_net.json new file mode 100644 index 000000000..664b8cfff --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-large/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "chinese-xlnet-large", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-large/input_meta.py b/paddle_samples/PaddleNLP/chinese-xlnet-large/input_meta.py new file mode 100644 index 000000000..9ea1655e0 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-large/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 9] + dtype = "int64" + data = [19, 11684, 121, 15954, 2090, 21957, 1039, 4, 3] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 9] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 2] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 9] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1] diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-large/model.py b/paddle_samples/PaddleNLP/chinese-xlnet-large/model.py new file mode 100644 index 000000000..c01bfa31f --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-large/model.py @@ -0,0 +1,8389 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + parameter_305, + parameter_306, + parameter_307, + parameter_308, + parameter_309, + parameter_310, + parameter_311, + parameter_312, + parameter_313, + parameter_314, + parameter_315, + parameter_316, + parameter_317, + parameter_318, + parameter_319, + parameter_320, + parameter_321, + parameter_322, + parameter_323, + parameter_324, + parameter_325, + parameter_326, + parameter_327, + parameter_328, + parameter_329, + parameter_330, + parameter_331, + parameter_332, + parameter_333, + parameter_334, + parameter_335, + parameter_336, + parameter_337, + parameter_338, + parameter_339, + parameter_340, + parameter_341, + parameter_342, + parameter_343, + parameter_344, + parameter_345, + parameter_346, + parameter_347, + parameter_348, + parameter_349, + parameter_350, + parameter_351, + parameter_352, + parameter_353, + parameter_354, + parameter_355, + parameter_356, + parameter_357, + parameter_358, + parameter_359, + parameter_360, + parameter_361, + parameter_362, + parameter_363, + parameter_364, + parameter_365, + parameter_366, + parameter_367, + parameter_368, + parameter_369, + parameter_370, + parameter_371, + parameter_372, + parameter_373, + parameter_374, + parameter_375, + parameter_376, + parameter_377, + parameter_378, + parameter_379, + parameter_380, + parameter_381, + parameter_382, + parameter_383, + parameter_384, + parameter_385, + parameter_386, + parameter_387, + parameter_388, + parameter_389, + parameter_390, + parameter_391, + parameter_392, + parameter_393, + parameter_394, + parameter_395, + parameter_396, + parameter_397, + parameter_398, + parameter_399, + parameter_400, + parameter_401, + parameter_402, + parameter_403, + parameter_404, + parameter_405, + parameter_406, + parameter_407, + parameter_408, + parameter_409, + data_0, + data_1, + data_2, + ): + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_1 = paddle._C_ops.transpose(data_0, [1, 0]) + del data_0 + + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_2 = paddle._C_ops.transpose(data_1, [1, 0]) + del data_1 + + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_3 = paddle._C_ops.transpose(data_2, [1, 0]) + del data_2 + + # pd_op.cast: (9x1xf32) <- (9x1xi64) + cast_0 = paddle._C_ops.cast(transpose_3, paddle.float32) + del transpose_3 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (9x1xf32) <- (9x1xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.unsqueeze: (1x9x1xf32) <- (9x1xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.unsqueeze: (1x9x1x1xf32) <- (1x9x1xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.greater_than: (1x9x1x1xb) <- (1x9x1x1xf32, xf32) + greater_than_0 = paddle._C_ops.greater_than(unsqueeze_1, full_1) + del unsqueeze_1 + + # pd_op.cast: (1x9x1x1xf32) <- (1x9x1x1xb) + cast_1 = paddle._C_ops.cast(greater_than_0, paddle.float32) + del greater_than_0 + + # pd_op.full: (9xf32) <- () + full_2 = paddle._C_ops.full( + [9], float("1"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.diag: (9x9xf32) <- (9xf32) + diag_0 = paddle._C_ops.diag(full_2, 0, float("0")) + del full_2 + + # pd_op.scale: (9x9xf32) <- (9x9xf32, 1xf32) + scale_1 = paddle._C_ops.scale(diag_0, full_0, float("0"), True) + del diag_0, full_0 + + # pd_op.cast: (9x9xf32) <- (9x9xf32) + cast_2 = paddle._C_ops.cast(scale_1, paddle.float32) + del scale_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [2, 3] + + # pd_op.unsqueeze: (9x9x1x1xf32) <- (9x9xf32, 2xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(cast_2, full_int_array_2) + del cast_2, full_int_array_2 + + # pd_op.add: (9x9x1x1xf32) <- (1x9x1x1xf32, 9x9x1x1xf32) + add_0 = paddle._C_ops.add(cast_1, unsqueeze_2) + del cast_1, unsqueeze_2 + + # pd_op.greater_than: (9x9x1x1xb) <- (9x9x1x1xf32, xf32) + greater_than_1 = paddle._C_ops.greater_than(add_0, full_1) + del add_0, full_1 + + # pd_op.cast: (9x9x1x1xf32) <- (9x9x1x1xb) + cast_3 = paddle._C_ops.cast(greater_than_1, paddle.float32) + del greater_than_1 + + # pd_op.embedding: (9x1x1024xf32) <- (9x1xi64, 32000x1024xf32) + embedding_0 = paddle._C_ops.embedding(transpose_1, parameter_408, -1, False) + del parameter_408, transpose_1 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [1] + + # pd_op.unsqueeze: (9x1x1xi64) <- (9x1xi64, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_3) + + # pd_op.unsqueeze: (1x9x1xi64) <- (9x1xi64, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_0) + del full_int_array_0, transpose_2 + + # pd_op.not_equal: (9x9x1xb) <- (9x1x1xi64, 1x9x1xi64) + not_equal_0 = paddle._C_ops.not_equal(unsqueeze_3, unsqueeze_4) + del unsqueeze_3, unsqueeze_4 + + # pd_op.cast: (9x9x1xi64) <- (9x9x1xb) + cast_4 = paddle._C_ops.cast(not_equal_0, paddle.int64) + del not_equal_0 + + # pd_op.full: (1xi32) <- () + full_4 = paddle._C_ops.full( + [1], float("2"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.one_hot: (9x9x1x2xf32) <- (9x9x1xi64, 1xi32) + one_hot_0 = paddle._C_ops.one_hot( + cast_4 % paddle.cast(full_4, cast_4.dtype), full_4 + ) + del cast_4, full_4 + + # pd_op.cast: (9x9x1x2xf32) <- (9x9x1x2xf32) + cast_5 = paddle._C_ops.cast(one_hot_0, paddle.float32) + del one_hot_0 + + # pd_op.full: (1xf64) <- () + full_5 = paddle._C_ops.full( + [1], float("0"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_6 = paddle._C_ops.full( + [1], float("1024"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_7 = paddle._C_ops.full( + [1], float("2"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (512xf32) <- (1xf64, 1xf64, 1xf64) + arange_0 = paddle.arange(full_5, full_6, full_7, dtype="float32") + del full_6, full_7 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.000976562"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (512xf32) <- (512xf32, 1xf32) + scale_2 = paddle._C_ops.scale(arange_0, full_8, float("0"), True) + del arange_0, full_8 + + # pd_op.full: (512xf32) <- () + full_9 = paddle._C_ops.full( + [512], + float("10000"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.elementwise_pow: (512xf32) <- (512xf32, 512xf32) + elementwise_pow_0 = paddle._C_ops.elementwise_pow(full_9, scale_2) + del full_9, scale_2 + + # pd_op.full: (512xf32) <- () + full_10 = paddle._C_ops.full( + [512], + float("1"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.divide: (512xf32) <- (512xf32, 512xf32) + divide_0 = paddle._C_ops.divide(full_10, elementwise_pow_0) + del elementwise_pow_0, full_10 + + # pd_op.full: (1xf64) <- () + full_11 = paddle._C_ops.full( + [1], float("9"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_12 = paddle._C_ops.full( + [1], float("-9"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_13 = paddle._C_ops.full( + [1], float("-1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (18xf32) <- (1xf64, 1xf64, 1xf64) + arange_1 = paddle.arange(full_11, full_12, full_13, dtype="float32") + del full_12, full_13 + + # builtin.combine: ([18xf32, 512xf32]) <- (18xf32, 512xf32) + combine_0 = [arange_1, divide_0] + del arange_1, divide_0 + + # pd_op.einsum: (18x512xf32, [0xf32, 0xf32], [18xf32, 512xf32]) <- ([18xf32, 512xf32]) + einsum_0, einsum_1, einsum_2 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_0, "i,d->id"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_0 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_0, + split_1, + ) = einsum_1 + del einsum_1 + + # builtin.split: (18xf32, 512xf32) <- ([18xf32, 512xf32]) + ( + split_2, + split_3, + ) = einsum_2 + del einsum_2 + + # pd_op.sin: (18x512xf32) <- (18x512xf32) + sin_0 = paddle._C_ops.sin(einsum_0) + + # pd_op.cos: (18x512xf32) <- (18x512xf32) + cos_0 = paddle._C_ops.cos(einsum_0) + del einsum_0 + + # pd_op.full: (1xi32) <- () + full_14 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([18x512xf32, 18x512xf32]) <- (18x512xf32, 18x512xf32) + combine_1 = [sin_0, cos_0] + del cos_0, sin_0 + + # pd_op.concat: (18x1024xf32) <- ([18x512xf32, 18x512xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_1, full_14) + del combine_1, full_14 + + # pd_op.unsqueeze: (18x1x1024xf32) <- (18x1024xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(concat_0, full_int_array_3) + del concat_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 1, -1] + + # pd_op.expand: (18x1x1024xf32) <- (18x1x1024xf32, 3xi64) + expand_0 = paddle._C_ops.expand(unsqueeze_5, full_int_array_4) + del full_int_array_4, unsqueeze_5 + + # pd_op.dropout: (18x1x1024xf32, 18x1x1024xui8) <- (18x1x1024xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + expand_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del expand_0 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_407, False, False) + del parameter_407 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_5 = [9, 1, 16, 64] + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_0, full_int_array_5) + del matmul_0 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_406, False, False) + del parameter_406 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_1, full_int_array_5) + del matmul_1 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_405, False, False) + del parameter_405 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_2, full_int_array_5) + del matmul_2 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_3 = paddle._C_ops.matmul(dropout_2, parameter_403, False, False) + del parameter_403 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_6 = [18, -1, 16, 64] + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(matmul_3, full_int_array_6) + del matmul_3 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_1 = paddle._C_ops.add(reshape_0, parameter_400) + del parameter_400 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_2 = [add_1, reshape_1] + del add_1, reshape_1 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_3, einsum_4, einsum_5 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_2, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_2 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_4, + split_5, + ) = einsum_4 + del einsum_4 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_6, + split_7, + ) = einsum_5 + del einsum_5 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_2 = paddle._C_ops.add(reshape_0, parameter_402) + del parameter_402 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_3 = [add_2, reshape_3] + del add_2, reshape_3 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_6, einsum_7, einsum_8 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_3, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_3 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_8, + split_9, + ) = einsum_7 + del einsum_7 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_10, + split_11, + ) = einsum_8 + del einsum_8 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_7 = [1, 16, 18, 9] + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(einsum_6, full_int_array_7) + del einsum_6 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [2147483647] + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + reshape_4, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_4 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, 16, 9, 17] + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(slice_0, full_int_array_9) + del slice_0 + + # pd_op.full: (1xf64) <- () + full_15 = paddle._C_ops.full( + [1], float("1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (9xi64) <- (1xf64, 1xf64, 1xf64) + arange_2 = paddle.arange(full_5, full_11, full_15, dtype="int64") + del full_11, full_15, full_5 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_0 = paddle._C_ops.index_select(reshape_5, arange_2, 3) + del reshape_5 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_3 = paddle._C_ops.add(reshape_0, parameter_401) + del parameter_401, reshape_0 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_4 = [add_3, parameter_399] + del add_3, parameter_399 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_9, einsum_10, einsum_11 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_4, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_4 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_12, + split_13, + ) = einsum_10 + del einsum_10 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_14, + split_15, + ) = einsum_11 + del einsum_11 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_5 = [cast_5, einsum_9] + del einsum_9 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_12, einsum_13, einsum_14 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_5, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_5 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_16, + split_17, + ) = einsum_13 + del einsum_13 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_18, + split_19, + ) = einsum_14 + del einsum_14 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_4 = paddle._C_ops.add(einsum_3, index_select_0) + del einsum_3, index_select_0 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_5 = paddle._C_ops.add(add_4, einsum_12) + del add_4, einsum_12 + + # pd_op.full: (1xf32) <- () + full_16 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_3 = paddle._C_ops.scale(add_5, full_16, float("0"), True) + del add_5 + + # pd_op.transpose: (1x1x9x9xf32) <- (9x9x1x1xf32) + transpose_4 = paddle._C_ops.transpose(cast_3, [2, 3, 0, 1]) + del cast_3 + + # pd_op.full: (1xf32) <- () + full_17 = paddle._C_ops.full( + [1], float("1e+30"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x9x9xf32) <- (1x1x9x9xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_4, full_17, float("0"), True) + del full_17, transpose_4 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_0 = paddle._C_ops.subtract(scale_3, scale_4) + del scale_3 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_0 = paddle._C_ops.softmax(subtract_0, 3) + del subtract_0 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_6 = [dropout_4, reshape_2] + del dropout_4, reshape_2 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_15, einsum_16, einsum_17 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_6, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_6 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_20, + split_21, + ) = einsum_16 + del einsum_16 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_22, + split_23, + ) = einsum_17 + del einsum_17 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_10 = [9, 1, 1024] + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_6 = paddle._C_ops.reshape(einsum_15, full_int_array_10) + del einsum_15 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_7 = [reshape_6, parameter_404] + del parameter_404, reshape_6 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_18, einsum_19, einsum_20 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_7, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_7 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_24, + split_25, + ) = einsum_19 + del einsum_19 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_26, + split_27, + ) = einsum_20 + del einsum_20 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_18 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_6 = paddle._C_ops.add(dropout_6, dropout_0) + del dropout_0, dropout_6 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_398, parameter_397, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_6, parameter_397, parameter_398 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_0, parameter_394, False, False) + del parameter_394 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_7 = paddle._C_ops.add(matmul_4, parameter_393) + del matmul_4, parameter_393 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_0 = paddle._C_ops.relu(add_7) + del add_7 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_0 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_5 = paddle._C_ops.matmul(dropout_8, parameter_392, False, False) + del dropout_8, parameter_392 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_8 = paddle._C_ops.add(matmul_5, parameter_391) + del matmul_5, parameter_391 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_8 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_9 = paddle._C_ops.add(dropout_10, layer_norm_0) + del dropout_10, layer_norm_0 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_9, parameter_396, parameter_395, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_9, parameter_395, parameter_396 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_3, parameter_390, False, False) + del parameter_390 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(matmul_6, full_int_array_5) + del matmul_6 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_389, False, False) + del parameter_389 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_7, full_int_array_5) + del matmul_7 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_3, parameter_388, False, False) + del parameter_388 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_8, full_int_array_5) + del matmul_8 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_9 = paddle._C_ops.matmul(dropout_2, parameter_386, False, False) + del parameter_386 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_9, full_int_array_6) + del matmul_9 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_10 = paddle._C_ops.add(reshape_7, parameter_383) + del parameter_383 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_8 = [add_10, reshape_8] + del add_10, reshape_8 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_21, einsum_22, einsum_23 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_8, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_8 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_28, + split_29, + ) = einsum_22 + del einsum_22 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_30, + split_31, + ) = einsum_23 + del einsum_23 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_11 = paddle._C_ops.add(reshape_7, parameter_385) + del parameter_385 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_9 = [add_11, reshape_10] + del add_11, reshape_10 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_24, einsum_25, einsum_26 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_9, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_9 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_32, + split_33, + ) = einsum_25 + del einsum_25 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_34, + split_35, + ) = einsum_26 + del einsum_26 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(einsum_24, full_int_array_7) + del einsum_24 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + reshape_11, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_11 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(slice_1, full_int_array_9) + del slice_1 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_1 = paddle._C_ops.index_select(reshape_12, arange_2, 3) + del reshape_12 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_12 = paddle._C_ops.add(reshape_7, parameter_384) + del parameter_384, reshape_7 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_10 = [add_12, parameter_382] + del add_12, parameter_382 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_27, einsum_28, einsum_29 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_10, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_10 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_36, + split_37, + ) = einsum_28 + del einsum_28 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_38, + split_39, + ) = einsum_29 + del einsum_29 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_11 = [cast_5, einsum_27] + del einsum_27 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_30, einsum_31, einsum_32 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_11, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_11 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_40, + split_41, + ) = einsum_31 + del einsum_31 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_42, + split_43, + ) = einsum_32 + del einsum_32 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_13 = paddle._C_ops.add(einsum_21, index_select_1) + del einsum_21, index_select_1 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_14 = paddle._C_ops.add(add_13, einsum_30) + del add_13, einsum_30 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_5 = paddle._C_ops.scale(add_14, full_16, float("0"), True) + del add_14 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_1 = paddle._C_ops.subtract(scale_5, scale_4) + del scale_5 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_1 = paddle._C_ops.softmax(subtract_1, 3) + del subtract_1 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_12 = [dropout_12, reshape_9] + del dropout_12, reshape_9 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_33, einsum_34, einsum_35 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_12, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_12 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_44, + split_45, + ) = einsum_34 + del einsum_34 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_46, + split_47, + ) = einsum_35 + del einsum_35 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_13 = paddle._C_ops.reshape(einsum_33, full_int_array_10) + del einsum_33 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_13 = [reshape_13, parameter_387] + del parameter_387, reshape_13 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_36, einsum_37, einsum_38 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_13, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_13 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_48, + split_49, + ) = einsum_37 + del einsum_37 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_50, + split_51, + ) = einsum_38 + del einsum_38 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_36, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_36 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_15 = paddle._C_ops.add(dropout_14, layer_norm_3) + del dropout_14, layer_norm_3 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_15, parameter_381, parameter_380, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_15, parameter_380, parameter_381 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_377, False, False) + del parameter_377 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_16 = paddle._C_ops.add(matmul_10, parameter_376) + del matmul_10, parameter_376 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_1 = paddle._C_ops.relu(add_16) + del add_16 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_1 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_11 = paddle._C_ops.matmul(dropout_16, parameter_375, False, False) + del dropout_16, parameter_375 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_17 = paddle._C_ops.add(matmul_11, parameter_374) + del matmul_11, parameter_374 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_17 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_18 = paddle._C_ops.add(dropout_18, layer_norm_6) + del dropout_18, layer_norm_6 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_379, parameter_378, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18, parameter_378, parameter_379 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_9, parameter_373, False, False) + del parameter_373 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_12, full_int_array_5) + del matmul_12 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_13 = paddle._C_ops.matmul(layer_norm_9, parameter_372, False, False) + del parameter_372 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(matmul_13, full_int_array_5) + del matmul_13 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_9, parameter_371, False, False) + del parameter_371 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_14, full_int_array_5) + del matmul_14 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_15 = paddle._C_ops.matmul(dropout_2, parameter_369, False, False) + del parameter_369 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_15, full_int_array_6) + del matmul_15 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_19 = paddle._C_ops.add(reshape_14, parameter_366) + del parameter_366 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_14 = [add_19, reshape_15] + del add_19, reshape_15 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_39, einsum_40, einsum_41 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_14, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_14 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_52, + split_53, + ) = einsum_40 + del einsum_40 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_54, + split_55, + ) = einsum_41 + del einsum_41 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_20 = paddle._C_ops.add(reshape_14, parameter_368) + del parameter_368 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_15 = [add_20, reshape_17] + del add_20, reshape_17 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_42, einsum_43, einsum_44 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_15, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_15 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_56, + split_57, + ) = einsum_43 + del einsum_43 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_58, + split_59, + ) = einsum_44 + del einsum_44 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(einsum_42, full_int_array_7) + del einsum_42 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + reshape_18, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_18 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_19 = paddle._C_ops.reshape(slice_2, full_int_array_9) + del slice_2 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_2 = paddle._C_ops.index_select(reshape_19, arange_2, 3) + del reshape_19 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_21 = paddle._C_ops.add(reshape_14, parameter_367) + del parameter_367, reshape_14 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_16 = [add_21, parameter_365] + del add_21, parameter_365 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_45, einsum_46, einsum_47 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_16, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_16 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_60, + split_61, + ) = einsum_46 + del einsum_46 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_62, + split_63, + ) = einsum_47 + del einsum_47 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_17 = [cast_5, einsum_45] + del einsum_45 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_48, einsum_49, einsum_50 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_17, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_17 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_64, + split_65, + ) = einsum_49 + del einsum_49 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_66, + split_67, + ) = einsum_50 + del einsum_50 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_22 = paddle._C_ops.add(einsum_39, index_select_2) + del einsum_39, index_select_2 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_23 = paddle._C_ops.add(add_22, einsum_48) + del add_22, einsum_48 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_6 = paddle._C_ops.scale(add_23, full_16, float("0"), True) + del add_23 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_2 = paddle._C_ops.subtract(scale_6, scale_4) + del scale_6 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_2 = paddle._C_ops.softmax(subtract_2, 3) + del subtract_2 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_18 = [dropout_20, reshape_16] + del dropout_20, reshape_16 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_51, einsum_52, einsum_53 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_18, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_18 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_68, + split_69, + ) = einsum_52 + del einsum_52 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_70, + split_71, + ) = einsum_53 + del einsum_53 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_20 = paddle._C_ops.reshape(einsum_51, full_int_array_10) + del einsum_51 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_19 = [reshape_20, parameter_370] + del parameter_370, reshape_20 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_54, einsum_55, einsum_56 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_19, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_19 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_72, + split_73, + ) = einsum_55 + del einsum_55 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_74, + split_75, + ) = einsum_56 + del einsum_56 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_54, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_54 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_24 = paddle._C_ops.add(dropout_22, layer_norm_9) + del dropout_22, layer_norm_9 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_24, parameter_364, parameter_363, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_24, parameter_363, parameter_364 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_12, parameter_360, False, False) + del parameter_360 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_25 = paddle._C_ops.add(matmul_16, parameter_359) + del matmul_16, parameter_359 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_2 = paddle._C_ops.relu(add_25) + del add_25 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_2 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_17 = paddle._C_ops.matmul(dropout_24, parameter_358, False, False) + del dropout_24, parameter_358 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_26 = paddle._C_ops.add(matmul_17, parameter_357) + del matmul_17, parameter_357 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_26, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_26 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_27 = paddle._C_ops.add(dropout_26, layer_norm_12) + del dropout_26, layer_norm_12 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_27, parameter_362, parameter_361, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_27, parameter_361, parameter_362 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_15, parameter_356, False, False) + del parameter_356 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_18, full_int_array_5) + del matmul_18 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_15, parameter_355, False, False) + del parameter_355 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_19, full_int_array_5) + del matmul_19 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_20 = paddle._C_ops.matmul(layer_norm_15, parameter_354, False, False) + del parameter_354 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_23 = paddle._C_ops.reshape(matmul_20, full_int_array_5) + del matmul_20 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_21 = paddle._C_ops.matmul(dropout_2, parameter_352, False, False) + del parameter_352 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_21, full_int_array_6) + del matmul_21 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_28 = paddle._C_ops.add(reshape_21, parameter_349) + del parameter_349 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_20 = [add_28, reshape_22] + del add_28, reshape_22 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_57, einsum_58, einsum_59 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_20, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_20 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_76, + split_77, + ) = einsum_58 + del einsum_58 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_78, + split_79, + ) = einsum_59 + del einsum_59 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_29 = paddle._C_ops.add(reshape_21, parameter_351) + del parameter_351 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_21 = [add_29, reshape_24] + del add_29, reshape_24 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_60, einsum_61, einsum_62 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_21, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_21 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_80, + split_81, + ) = einsum_61 + del einsum_61 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_82, + split_83, + ) = einsum_62 + del einsum_62 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(einsum_60, full_int_array_7) + del einsum_60 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + reshape_25, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_25 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(slice_3, full_int_array_9) + del slice_3 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_3 = paddle._C_ops.index_select(reshape_26, arange_2, 3) + del reshape_26 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_30 = paddle._C_ops.add(reshape_21, parameter_350) + del parameter_350, reshape_21 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_22 = [add_30, parameter_348] + del add_30, parameter_348 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_63, einsum_64, einsum_65 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_22, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_22 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_84, + split_85, + ) = einsum_64 + del einsum_64 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_86, + split_87, + ) = einsum_65 + del einsum_65 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_23 = [cast_5, einsum_63] + del einsum_63 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_66, einsum_67, einsum_68 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_23, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_23 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_88, + split_89, + ) = einsum_67 + del einsum_67 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_90, + split_91, + ) = einsum_68 + del einsum_68 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_31 = paddle._C_ops.add(einsum_57, index_select_3) + del einsum_57, index_select_3 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_32 = paddle._C_ops.add(add_31, einsum_66) + del add_31, einsum_66 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_7 = paddle._C_ops.scale(add_32, full_16, float("0"), True) + del add_32 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_3 = paddle._C_ops.subtract(scale_7, scale_4) + del scale_7 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_3 = paddle._C_ops.softmax(subtract_3, 3) + del subtract_3 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_24 = [dropout_28, reshape_23] + del dropout_28, reshape_23 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_69, einsum_70, einsum_71 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_24, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_24 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_92, + split_93, + ) = einsum_70 + del einsum_70 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_94, + split_95, + ) = einsum_71 + del einsum_71 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(einsum_69, full_int_array_10) + del einsum_69 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_25 = [reshape_27, parameter_353] + del parameter_353, reshape_27 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_72, einsum_73, einsum_74 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_25, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_25 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_96, + split_97, + ) = einsum_73 + del einsum_73 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_98, + split_99, + ) = einsum_74 + del einsum_74 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_72, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_72 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_33 = paddle._C_ops.add(dropout_30, layer_norm_15) + del dropout_30, layer_norm_15 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_33, parameter_347, parameter_346, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_33, parameter_346, parameter_347 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_18, parameter_343, False, False) + del parameter_343 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_34 = paddle._C_ops.add(matmul_22, parameter_342) + del matmul_22, parameter_342 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_3 = paddle._C_ops.relu(add_34) + del add_34 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_3 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_23 = paddle._C_ops.matmul(dropout_32, parameter_341, False, False) + del dropout_32, parameter_341 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_35 = paddle._C_ops.add(matmul_23, parameter_340) + del matmul_23, parameter_340 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_35, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_35 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_36 = paddle._C_ops.add(dropout_34, layer_norm_18) + del dropout_34, layer_norm_18 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_36, parameter_345, parameter_344, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_36, parameter_344, parameter_345 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_21, parameter_339, False, False) + del parameter_339 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_24, full_int_array_5) + del matmul_24 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_21, parameter_338, False, False) + del parameter_338 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_25, full_int_array_5) + del matmul_25 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_21, parameter_337, False, False) + del parameter_337 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_26, full_int_array_5) + del matmul_26 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_27 = paddle._C_ops.matmul(dropout_2, parameter_335, False, False) + del parameter_335 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(matmul_27, full_int_array_6) + del matmul_27 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_37 = paddle._C_ops.add(reshape_28, parameter_332) + del parameter_332 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_26 = [add_37, reshape_29] + del add_37, reshape_29 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_75, einsum_76, einsum_77 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_26, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_26 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_100, + split_101, + ) = einsum_76 + del einsum_76 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_102, + split_103, + ) = einsum_77 + del einsum_77 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_38 = paddle._C_ops.add(reshape_28, parameter_334) + del parameter_334 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_27 = [add_38, reshape_31] + del add_38, reshape_31 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_78, einsum_79, einsum_80 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_27, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_27 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_104, + split_105, + ) = einsum_79 + del einsum_79 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_106, + split_107, + ) = einsum_80 + del einsum_80 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(einsum_78, full_int_array_7) + del einsum_78 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + reshape_32, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_32 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(slice_4, full_int_array_9) + del slice_4 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_4 = paddle._C_ops.index_select(reshape_33, arange_2, 3) + del reshape_33 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_39 = paddle._C_ops.add(reshape_28, parameter_333) + del parameter_333, reshape_28 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_28 = [add_39, parameter_331] + del add_39, parameter_331 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_81, einsum_82, einsum_83 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_28, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_28 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_108, + split_109, + ) = einsum_82 + del einsum_82 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_110, + split_111, + ) = einsum_83 + del einsum_83 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_29 = [cast_5, einsum_81] + del einsum_81 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_84, einsum_85, einsum_86 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_29, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_29 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_112, + split_113, + ) = einsum_85 + del einsum_85 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_114, + split_115, + ) = einsum_86 + del einsum_86 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_40 = paddle._C_ops.add(einsum_75, index_select_4) + del einsum_75, index_select_4 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_41 = paddle._C_ops.add(add_40, einsum_84) + del add_40, einsum_84 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_8 = paddle._C_ops.scale(add_41, full_16, float("0"), True) + del add_41 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_4 = paddle._C_ops.subtract(scale_8, scale_4) + del scale_8 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_4 = paddle._C_ops.softmax(subtract_4, 3) + del subtract_4 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_30 = [dropout_36, reshape_30] + del dropout_36, reshape_30 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_87, einsum_88, einsum_89 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_30, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_30 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_116, + split_117, + ) = einsum_88 + del einsum_88 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_118, + split_119, + ) = einsum_89 + del einsum_89 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_34 = paddle._C_ops.reshape(einsum_87, full_int_array_10) + del einsum_87 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_31 = [reshape_34, parameter_336] + del parameter_336, reshape_34 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_90, einsum_91, einsum_92 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_31, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_31 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_120, + split_121, + ) = einsum_91 + del einsum_91 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_122, + split_123, + ) = einsum_92 + del einsum_92 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_90, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_90 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_42 = paddle._C_ops.add(dropout_38, layer_norm_21) + del dropout_38, layer_norm_21 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_330, parameter_329, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_42, parameter_329, parameter_330 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_28 = paddle._C_ops.matmul(layer_norm_24, parameter_326, False, False) + del parameter_326 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_43 = paddle._C_ops.add(matmul_28, parameter_325) + del matmul_28, parameter_325 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_4 = paddle._C_ops.relu(add_43) + del add_43 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_4 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_29 = paddle._C_ops.matmul(dropout_40, parameter_324, False, False) + del dropout_40, parameter_324 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_44 = paddle._C_ops.add(matmul_29, parameter_323) + del matmul_29, parameter_323 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_44, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_44 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_45 = paddle._C_ops.add(dropout_42, layer_norm_24) + del dropout_42, layer_norm_24 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_328, parameter_327, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_45, parameter_327, parameter_328 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_27, parameter_322, False, False) + del parameter_322 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(matmul_30, full_int_array_5) + del matmul_30 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_27, parameter_321, False, False) + del parameter_321 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_31, full_int_array_5) + del matmul_31 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_27, parameter_320, False, False) + del parameter_320 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_32, full_int_array_5) + del matmul_32 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_33 = paddle._C_ops.matmul(dropout_2, parameter_318, False, False) + del parameter_318 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_33, full_int_array_6) + del matmul_33 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_46 = paddle._C_ops.add(reshape_35, parameter_315) + del parameter_315 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_32 = [add_46, reshape_36] + del add_46, reshape_36 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_93, einsum_94, einsum_95 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_32, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_32 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_124, + split_125, + ) = einsum_94 + del einsum_94 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_126, + split_127, + ) = einsum_95 + del einsum_95 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_47 = paddle._C_ops.add(reshape_35, parameter_317) + del parameter_317 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_33 = [add_47, reshape_38] + del add_47, reshape_38 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_96, einsum_97, einsum_98 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_33, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_33 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_128, + split_129, + ) = einsum_97 + del einsum_97 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_130, + split_131, + ) = einsum_98 + del einsum_98 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(einsum_96, full_int_array_7) + del einsum_96 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + reshape_39, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_39 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(slice_5, full_int_array_9) + del slice_5 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_5 = paddle._C_ops.index_select(reshape_40, arange_2, 3) + del reshape_40 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_48 = paddle._C_ops.add(reshape_35, parameter_316) + del parameter_316, reshape_35 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_34 = [add_48, parameter_314] + del add_48, parameter_314 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_99, einsum_100, einsum_101 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_34, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_34 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_132, + split_133, + ) = einsum_100 + del einsum_100 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_134, + split_135, + ) = einsum_101 + del einsum_101 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_35 = [cast_5, einsum_99] + del einsum_99 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_102, einsum_103, einsum_104 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_35, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_35 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_136, + split_137, + ) = einsum_103 + del einsum_103 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_138, + split_139, + ) = einsum_104 + del einsum_104 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_49 = paddle._C_ops.add(einsum_93, index_select_5) + del einsum_93, index_select_5 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_50 = paddle._C_ops.add(add_49, einsum_102) + del add_49, einsum_102 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_9 = paddle._C_ops.scale(add_50, full_16, float("0"), True) + del add_50 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_5 = paddle._C_ops.subtract(scale_9, scale_4) + del scale_9 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_5 = paddle._C_ops.softmax(subtract_5, 3) + del subtract_5 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_36 = [dropout_44, reshape_37] + del dropout_44, reshape_37 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_105, einsum_106, einsum_107 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_36, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_36 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_140, + split_141, + ) = einsum_106 + del einsum_106 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_142, + split_143, + ) = einsum_107 + del einsum_107 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(einsum_105, full_int_array_10) + del einsum_105 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_37 = [reshape_41, parameter_319] + del parameter_319, reshape_41 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_108, einsum_109, einsum_110 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_37, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_37 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_144, + split_145, + ) = einsum_109 + del einsum_109 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_146, + split_147, + ) = einsum_110 + del einsum_110 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_108, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_108 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_51 = paddle._C_ops.add(dropout_46, layer_norm_27) + del dropout_46, layer_norm_27 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_51, parameter_313, parameter_312, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_51, parameter_312, parameter_313 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_30, parameter_309, False, False) + del parameter_309 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_52 = paddle._C_ops.add(matmul_34, parameter_308) + del matmul_34, parameter_308 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_5 = paddle._C_ops.relu(add_52) + del add_52 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_5 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_35 = paddle._C_ops.matmul(dropout_48, parameter_307, False, False) + del dropout_48, parameter_307 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_53 = paddle._C_ops.add(matmul_35, parameter_306) + del matmul_35, parameter_306 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_53, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_53 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_54 = paddle._C_ops.add(dropout_50, layer_norm_30) + del dropout_50, layer_norm_30 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_54, parameter_311, parameter_310, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_54, parameter_310, parameter_311 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_33, parameter_305, False, False) + del parameter_305 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_36, full_int_array_5) + del matmul_36 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_37 = paddle._C_ops.matmul(layer_norm_33, parameter_304, False, False) + del parameter_304 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(matmul_37, full_int_array_5) + del matmul_37 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_33, parameter_303, False, False) + del parameter_303 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_38, full_int_array_5) + del matmul_38 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_39 = paddle._C_ops.matmul(dropout_2, parameter_301, False, False) + del parameter_301 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_39, full_int_array_6) + del matmul_39 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_55 = paddle._C_ops.add(reshape_42, parameter_298) + del parameter_298 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_38 = [add_55, reshape_43] + del add_55, reshape_43 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_111, einsum_112, einsum_113 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_38, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_38 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_148, + split_149, + ) = einsum_112 + del einsum_112 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_150, + split_151, + ) = einsum_113 + del einsum_113 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_56 = paddle._C_ops.add(reshape_42, parameter_300) + del parameter_300 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_39 = [add_56, reshape_45] + del add_56, reshape_45 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_114, einsum_115, einsum_116 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_39, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_39 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_152, + split_153, + ) = einsum_115 + del einsum_115 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_154, + split_155, + ) = einsum_116 + del einsum_116 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(einsum_114, full_int_array_7) + del einsum_114 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + reshape_46, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_46 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(slice_6, full_int_array_9) + del slice_6 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_6 = paddle._C_ops.index_select(reshape_47, arange_2, 3) + del reshape_47 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_57 = paddle._C_ops.add(reshape_42, parameter_299) + del parameter_299, reshape_42 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_40 = [add_57, parameter_297] + del add_57, parameter_297 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_117, einsum_118, einsum_119 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_40, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_40 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_156, + split_157, + ) = einsum_118 + del einsum_118 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_158, + split_159, + ) = einsum_119 + del einsum_119 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_41 = [cast_5, einsum_117] + del einsum_117 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_120, einsum_121, einsum_122 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_41, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_41 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_160, + split_161, + ) = einsum_121 + del einsum_121 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_162, + split_163, + ) = einsum_122 + del einsum_122 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_58 = paddle._C_ops.add(einsum_111, index_select_6) + del einsum_111, index_select_6 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_59 = paddle._C_ops.add(add_58, einsum_120) + del add_58, einsum_120 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_10 = paddle._C_ops.scale(add_59, full_16, float("0"), True) + del add_59 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_6 = paddle._C_ops.subtract(scale_10, scale_4) + del scale_10 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_6 = paddle._C_ops.softmax(subtract_6, 3) + del subtract_6 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_42 = [dropout_52, reshape_44] + del dropout_52, reshape_44 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_123, einsum_124, einsum_125 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_42, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_42 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_164, + split_165, + ) = einsum_124 + del einsum_124 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_166, + split_167, + ) = einsum_125 + del einsum_125 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(einsum_123, full_int_array_10) + del einsum_123 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_43 = [reshape_48, parameter_302] + del parameter_302, reshape_48 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_126, einsum_127, einsum_128 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_43, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_43 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_168, + split_169, + ) = einsum_127 + del einsum_127 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_170, + split_171, + ) = einsum_128 + del einsum_128 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_126, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_126 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_60 = paddle._C_ops.add(dropout_54, layer_norm_33) + del dropout_54, layer_norm_33 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_60, parameter_296, parameter_295, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_60, parameter_295, parameter_296 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_36, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_61 = paddle._C_ops.add(matmul_40, parameter_291) + del matmul_40, parameter_291 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_6 = paddle._C_ops.relu(add_61) + del add_61 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_6 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_41 = paddle._C_ops.matmul(dropout_56, parameter_290, False, False) + del dropout_56, parameter_290 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_62 = paddle._C_ops.add(matmul_41, parameter_289) + del matmul_41, parameter_289 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_62, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_62 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_63 = paddle._C_ops.add(dropout_58, layer_norm_36) + del dropout_58, layer_norm_36 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_63, parameter_294, parameter_293, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_63, parameter_293, parameter_294 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_39, parameter_288, False, False) + del parameter_288 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(matmul_42, full_int_array_5) + del matmul_42 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_39, parameter_287, False, False) + del parameter_287 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(matmul_43, full_int_array_5) + del matmul_43 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_44 = paddle._C_ops.matmul(layer_norm_39, parameter_286, False, False) + del parameter_286 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_51 = paddle._C_ops.reshape(matmul_44, full_int_array_5) + del matmul_44 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_45 = paddle._C_ops.matmul(dropout_2, parameter_284, False, False) + del parameter_284 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(matmul_45, full_int_array_6) + del matmul_45 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_64 = paddle._C_ops.add(reshape_49, parameter_281) + del parameter_281 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_44 = [add_64, reshape_50] + del add_64, reshape_50 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_129, einsum_130, einsum_131 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_44, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_44 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_172, + split_173, + ) = einsum_130 + del einsum_130 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_174, + split_175, + ) = einsum_131 + del einsum_131 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_65 = paddle._C_ops.add(reshape_49, parameter_283) + del parameter_283 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_45 = [add_65, reshape_52] + del add_65, reshape_52 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_132, einsum_133, einsum_134 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_45, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_45 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_176, + split_177, + ) = einsum_133 + del einsum_133 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_178, + split_179, + ) = einsum_134 + del einsum_134 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(einsum_132, full_int_array_7) + del einsum_132 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + reshape_53, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_53 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(slice_7, full_int_array_9) + del slice_7 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_7 = paddle._C_ops.index_select(reshape_54, arange_2, 3) + del reshape_54 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_66 = paddle._C_ops.add(reshape_49, parameter_282) + del parameter_282, reshape_49 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_46 = [add_66, parameter_280] + del add_66, parameter_280 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_135, einsum_136, einsum_137 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_46, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_46 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_180, + split_181, + ) = einsum_136 + del einsum_136 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_182, + split_183, + ) = einsum_137 + del einsum_137 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_47 = [cast_5, einsum_135] + del einsum_135 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_138, einsum_139, einsum_140 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_47, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_47 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_184, + split_185, + ) = einsum_139 + del einsum_139 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_186, + split_187, + ) = einsum_140 + del einsum_140 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_67 = paddle._C_ops.add(einsum_129, index_select_7) + del einsum_129, index_select_7 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_68 = paddle._C_ops.add(add_67, einsum_138) + del add_67, einsum_138 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_11 = paddle._C_ops.scale(add_68, full_16, float("0"), True) + del add_68 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_7 = paddle._C_ops.subtract(scale_11, scale_4) + del scale_11 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_7 = paddle._C_ops.softmax(subtract_7, 3) + del subtract_7 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_48 = [dropout_60, reshape_51] + del dropout_60, reshape_51 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_141, einsum_142, einsum_143 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_48, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_48 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_188, + split_189, + ) = einsum_142 + del einsum_142 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_190, + split_191, + ) = einsum_143 + del einsum_143 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(einsum_141, full_int_array_10) + del einsum_141 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_49 = [reshape_55, parameter_285] + del parameter_285, reshape_55 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_144, einsum_145, einsum_146 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_49, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_49 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_192, + split_193, + ) = einsum_145 + del einsum_145 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_194, + split_195, + ) = einsum_146 + del einsum_146 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_144, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_144 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_69 = paddle._C_ops.add(dropout_62, layer_norm_39) + del dropout_62, layer_norm_39 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_69, parameter_279, parameter_278, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_69, parameter_278, parameter_279 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_42, parameter_275, False, False) + del parameter_275 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_70 = paddle._C_ops.add(matmul_46, parameter_274) + del matmul_46, parameter_274 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_7 = paddle._C_ops.relu(add_70) + del add_70 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_7 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_47 = paddle._C_ops.matmul(dropout_64, parameter_273, False, False) + del dropout_64, parameter_273 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_71 = paddle._C_ops.add(matmul_47, parameter_272) + del matmul_47, parameter_272 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_71, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_71 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_72 = paddle._C_ops.add(dropout_66, layer_norm_42) + del dropout_66, layer_norm_42 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_72, parameter_277, parameter_276, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_72, parameter_276, parameter_277 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_45, parameter_271, False, False) + del parameter_271 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(matmul_48, full_int_array_5) + del matmul_48 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_45, parameter_270, False, False) + del parameter_270 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(matmul_49, full_int_array_5) + del matmul_49 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_45, parameter_269, False, False) + del parameter_269 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(matmul_50, full_int_array_5) + del matmul_50 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_51 = paddle._C_ops.matmul(dropout_2, parameter_267, False, False) + del parameter_267 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_59 = paddle._C_ops.reshape(matmul_51, full_int_array_6) + del matmul_51 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_73 = paddle._C_ops.add(reshape_56, parameter_264) + del parameter_264 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_50 = [add_73, reshape_57] + del add_73, reshape_57 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_147, einsum_148, einsum_149 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_50, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_50 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_196, + split_197, + ) = einsum_148 + del einsum_148 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_198, + split_199, + ) = einsum_149 + del einsum_149 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_74 = paddle._C_ops.add(reshape_56, parameter_266) + del parameter_266 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_51 = [add_74, reshape_59] + del add_74, reshape_59 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_150, einsum_151, einsum_152 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_51, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_51 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_200, + split_201, + ) = einsum_151 + del einsum_151 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_202, + split_203, + ) = einsum_152 + del einsum_152 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(einsum_150, full_int_array_7) + del einsum_150 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + reshape_60, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_60 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(slice_8, full_int_array_9) + del slice_8 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_8 = paddle._C_ops.index_select(reshape_61, arange_2, 3) + del reshape_61 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_75 = paddle._C_ops.add(reshape_56, parameter_265) + del parameter_265, reshape_56 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_52 = [add_75, parameter_263] + del add_75, parameter_263 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_153, einsum_154, einsum_155 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_52, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_52 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_204, + split_205, + ) = einsum_154 + del einsum_154 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_206, + split_207, + ) = einsum_155 + del einsum_155 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_53 = [cast_5, einsum_153] + del einsum_153 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_156, einsum_157, einsum_158 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_53, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_53 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_208, + split_209, + ) = einsum_157 + del einsum_157 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_210, + split_211, + ) = einsum_158 + del einsum_158 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_76 = paddle._C_ops.add(einsum_147, index_select_8) + del einsum_147, index_select_8 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_77 = paddle._C_ops.add(add_76, einsum_156) + del add_76, einsum_156 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_12 = paddle._C_ops.scale(add_77, full_16, float("0"), True) + del add_77 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_8 = paddle._C_ops.subtract(scale_12, scale_4) + del scale_12 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_8 = paddle._C_ops.softmax(subtract_8, 3) + del subtract_8 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_54 = [dropout_68, reshape_58] + del dropout_68, reshape_58 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_159, einsum_160, einsum_161 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_54, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_54 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_212, + split_213, + ) = einsum_160 + del einsum_160 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_214, + split_215, + ) = einsum_161 + del einsum_161 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_62 = paddle._C_ops.reshape(einsum_159, full_int_array_10) + del einsum_159 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_55 = [reshape_62, parameter_268] + del parameter_268, reshape_62 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_162, einsum_163, einsum_164 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_55, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_55 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_216, + split_217, + ) = einsum_163 + del einsum_163 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_218, + split_219, + ) = einsum_164 + del einsum_164 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_162, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_162 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_78 = paddle._C_ops.add(dropout_70, layer_norm_45) + del dropout_70, layer_norm_45 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_78, parameter_262, parameter_261, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_78, parameter_261, parameter_262 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_52 = paddle._C_ops.matmul(layer_norm_48, parameter_258, False, False) + del parameter_258 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_79 = paddle._C_ops.add(matmul_52, parameter_257) + del matmul_52, parameter_257 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_8 = paddle._C_ops.relu(add_79) + del add_79 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_8 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_53 = paddle._C_ops.matmul(dropout_72, parameter_256, False, False) + del dropout_72, parameter_256 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_80 = paddle._C_ops.add(matmul_53, parameter_255) + del matmul_53, parameter_255 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_80, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_80 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_81 = paddle._C_ops.add(dropout_74, layer_norm_48) + del dropout_74, layer_norm_48 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_260, parameter_259, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_81, parameter_259, parameter_260 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_51, parameter_254, False, False) + del parameter_254 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(matmul_54, full_int_array_5) + del matmul_54 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_51, parameter_253, False, False) + del parameter_253 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(matmul_55, full_int_array_5) + del matmul_55 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_51, parameter_252, False, False) + del parameter_252 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(matmul_56, full_int_array_5) + del matmul_56 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_57 = paddle._C_ops.matmul(dropout_2, parameter_250, False, False) + del parameter_250 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(matmul_57, full_int_array_6) + del matmul_57 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_82 = paddle._C_ops.add(reshape_63, parameter_247) + del parameter_247 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_56 = [add_82, reshape_64] + del add_82, reshape_64 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_165, einsum_166, einsum_167 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_56, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_56 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_220, + split_221, + ) = einsum_166 + del einsum_166 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_222, + split_223, + ) = einsum_167 + del einsum_167 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_83 = paddle._C_ops.add(reshape_63, parameter_249) + del parameter_249 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_57 = [add_83, reshape_66] + del add_83, reshape_66 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_168, einsum_169, einsum_170 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_57, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_57 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_224, + split_225, + ) = einsum_169 + del einsum_169 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_226, + split_227, + ) = einsum_170 + del einsum_170 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(einsum_168, full_int_array_7) + del einsum_168 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + reshape_67, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_67 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(slice_9, full_int_array_9) + del slice_9 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_9 = paddle._C_ops.index_select(reshape_68, arange_2, 3) + del reshape_68 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_84 = paddle._C_ops.add(reshape_63, parameter_248) + del parameter_248, reshape_63 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_58 = [add_84, parameter_246] + del add_84, parameter_246 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_171, einsum_172, einsum_173 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_58, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_58 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_228, + split_229, + ) = einsum_172 + del einsum_172 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_230, + split_231, + ) = einsum_173 + del einsum_173 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_59 = [cast_5, einsum_171] + del einsum_171 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_174, einsum_175, einsum_176 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_59, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_59 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_232, + split_233, + ) = einsum_175 + del einsum_175 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_234, + split_235, + ) = einsum_176 + del einsum_176 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_85 = paddle._C_ops.add(einsum_165, index_select_9) + del einsum_165, index_select_9 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_86 = paddle._C_ops.add(add_85, einsum_174) + del add_85, einsum_174 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_13 = paddle._C_ops.scale(add_86, full_16, float("0"), True) + del add_86 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_9 = paddle._C_ops.subtract(scale_13, scale_4) + del scale_13 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_9 = paddle._C_ops.softmax(subtract_9, 3) + del subtract_9 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_60 = [dropout_76, reshape_65] + del dropout_76, reshape_65 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_177, einsum_178, einsum_179 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_60, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_60 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_236, + split_237, + ) = einsum_178 + del einsum_178 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_238, + split_239, + ) = einsum_179 + del einsum_179 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(einsum_177, full_int_array_10) + del einsum_177 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_61 = [reshape_69, parameter_251] + del parameter_251, reshape_69 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_180, einsum_181, einsum_182 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_61, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_61 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_240, + split_241, + ) = einsum_181 + del einsum_181 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_242, + split_243, + ) = einsum_182 + del einsum_182 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_180, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_180 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_87 = paddle._C_ops.add(dropout_78, layer_norm_51) + del dropout_78, layer_norm_51 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_245, parameter_244, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_87, parameter_244, parameter_245 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_54, parameter_241, False, False) + del parameter_241 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_88 = paddle._C_ops.add(matmul_58, parameter_240) + del matmul_58, parameter_240 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_9 = paddle._C_ops.relu(add_88) + del add_88 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_9 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_59 = paddle._C_ops.matmul(dropout_80, parameter_239, False, False) + del dropout_80, parameter_239 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_89 = paddle._C_ops.add(matmul_59, parameter_238) + del matmul_59, parameter_238 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_89, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_89 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_90 = paddle._C_ops.add(dropout_82, layer_norm_54) + del dropout_82, layer_norm_54 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_243, parameter_242, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_90, parameter_242, parameter_243 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_57, parameter_237, False, False) + del parameter_237 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(matmul_60, full_int_array_5) + del matmul_60 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_61 = paddle._C_ops.matmul(layer_norm_57, parameter_236, False, False) + del parameter_236 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(matmul_61, full_int_array_5) + del matmul_61 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_57, parameter_235, False, False) + del parameter_235 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_72 = paddle._C_ops.reshape(matmul_62, full_int_array_5) + del matmul_62 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_63 = paddle._C_ops.matmul(dropout_2, parameter_233, False, False) + del parameter_233 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(matmul_63, full_int_array_6) + del matmul_63 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_91 = paddle._C_ops.add(reshape_70, parameter_230) + del parameter_230 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_62 = [add_91, reshape_71] + del add_91, reshape_71 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_183, einsum_184, einsum_185 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_62, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_62 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_244, + split_245, + ) = einsum_184 + del einsum_184 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_246, + split_247, + ) = einsum_185 + del einsum_185 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_92 = paddle._C_ops.add(reshape_70, parameter_232) + del parameter_232 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_63 = [add_92, reshape_73] + del add_92, reshape_73 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_186, einsum_187, einsum_188 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_63, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_63 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_248, + split_249, + ) = einsum_187 + del einsum_187 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_250, + split_251, + ) = einsum_188 + del einsum_188 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(einsum_186, full_int_array_7) + del einsum_186 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + reshape_74, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_74 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(slice_10, full_int_array_9) + del slice_10 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_10 = paddle._C_ops.index_select(reshape_75, arange_2, 3) + del reshape_75 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_93 = paddle._C_ops.add(reshape_70, parameter_231) + del parameter_231, reshape_70 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_64 = [add_93, parameter_229] + del add_93, parameter_229 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_189, einsum_190, einsum_191 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_64, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_64 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_252, + split_253, + ) = einsum_190 + del einsum_190 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_254, + split_255, + ) = einsum_191 + del einsum_191 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_65 = [cast_5, einsum_189] + del einsum_189 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_192, einsum_193, einsum_194 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_65, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_65 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_256, + split_257, + ) = einsum_193 + del einsum_193 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_258, + split_259, + ) = einsum_194 + del einsum_194 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_94 = paddle._C_ops.add(einsum_183, index_select_10) + del einsum_183, index_select_10 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_95 = paddle._C_ops.add(add_94, einsum_192) + del add_94, einsum_192 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_14 = paddle._C_ops.scale(add_95, full_16, float("0"), True) + del add_95 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_10 = paddle._C_ops.subtract(scale_14, scale_4) + del scale_14 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_10 = paddle._C_ops.softmax(subtract_10, 3) + del subtract_10 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_66 = [dropout_84, reshape_72] + del dropout_84, reshape_72 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_195, einsum_196, einsum_197 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_66, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_66 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_260, + split_261, + ) = einsum_196 + del einsum_196 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_262, + split_263, + ) = einsum_197 + del einsum_197 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(einsum_195, full_int_array_10) + del einsum_195 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_67 = [reshape_76, parameter_234] + del parameter_234, reshape_76 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_198, einsum_199, einsum_200 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_67, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_67 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_264, + split_265, + ) = einsum_199 + del einsum_199 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_266, + split_267, + ) = einsum_200 + del einsum_200 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_198, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_198 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_96 = paddle._C_ops.add(dropout_86, layer_norm_57) + del dropout_86, layer_norm_57 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_96, parameter_228, parameter_227, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_96, parameter_227, parameter_228 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_60, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_97 = paddle._C_ops.add(matmul_64, parameter_223) + del matmul_64, parameter_223 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_10 = paddle._C_ops.relu(add_97) + del add_97 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_10 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_65 = paddle._C_ops.matmul(dropout_88, parameter_222, False, False) + del dropout_88, parameter_222 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_98 = paddle._C_ops.add(matmul_65, parameter_221) + del matmul_65, parameter_221 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_98, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_98 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_99 = paddle._C_ops.add(dropout_90, layer_norm_60) + del dropout_90, layer_norm_60 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_99, parameter_226, parameter_225, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_99, parameter_225, parameter_226 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_63, parameter_220, False, False) + del parameter_220 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(matmul_66, full_int_array_5) + del matmul_66 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_63, parameter_219, False, False) + del parameter_219 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(matmul_67, full_int_array_5) + del matmul_67 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_68 = paddle._C_ops.matmul(layer_norm_63, parameter_218, False, False) + del parameter_218 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(matmul_68, full_int_array_5) + del matmul_68 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_69 = paddle._C_ops.matmul(dropout_2, parameter_216, False, False) + del parameter_216 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_80 = paddle._C_ops.reshape(matmul_69, full_int_array_6) + del matmul_69 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_100 = paddle._C_ops.add(reshape_77, parameter_213) + del parameter_213 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_68 = [add_100, reshape_78] + del add_100, reshape_78 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_201, einsum_202, einsum_203 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_68, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_68 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_268, + split_269, + ) = einsum_202 + del einsum_202 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_270, + split_271, + ) = einsum_203 + del einsum_203 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_101 = paddle._C_ops.add(reshape_77, parameter_215) + del parameter_215 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_69 = [add_101, reshape_80] + del add_101, reshape_80 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_204, einsum_205, einsum_206 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_69, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_69 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_272, + split_273, + ) = einsum_205 + del einsum_205 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_274, + split_275, + ) = einsum_206 + del einsum_206 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(einsum_204, full_int_array_7) + del einsum_204 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + reshape_81, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_81 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(slice_11, full_int_array_9) + del slice_11 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_11 = paddle._C_ops.index_select(reshape_82, arange_2, 3) + del reshape_82 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_102 = paddle._C_ops.add(reshape_77, parameter_214) + del parameter_214, reshape_77 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_70 = [add_102, parameter_212] + del add_102, parameter_212 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_207, einsum_208, einsum_209 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_70, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_70 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_276, + split_277, + ) = einsum_208 + del einsum_208 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_278, + split_279, + ) = einsum_209 + del einsum_209 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_71 = [cast_5, einsum_207] + del einsum_207 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_210, einsum_211, einsum_212 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_71, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_71 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_280, + split_281, + ) = einsum_211 + del einsum_211 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_282, + split_283, + ) = einsum_212 + del einsum_212 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_103 = paddle._C_ops.add(einsum_201, index_select_11) + del einsum_201, index_select_11 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_104 = paddle._C_ops.add(add_103, einsum_210) + del add_103, einsum_210 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_15 = paddle._C_ops.scale(add_104, full_16, float("0"), True) + del add_104 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_11 = paddle._C_ops.subtract(scale_15, scale_4) + del scale_15 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_11 = paddle._C_ops.softmax(subtract_11, 3) + del subtract_11 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_72 = [dropout_92, reshape_79] + del dropout_92, reshape_79 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_213, einsum_214, einsum_215 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_72, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_72 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_284, + split_285, + ) = einsum_214 + del einsum_214 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_286, + split_287, + ) = einsum_215 + del einsum_215 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(einsum_213, full_int_array_10) + del einsum_213 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_73 = [reshape_83, parameter_217] + del parameter_217, reshape_83 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_216, einsum_217, einsum_218 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_73, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_73 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_288, + split_289, + ) = einsum_217 + del einsum_217 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_290, + split_291, + ) = einsum_218 + del einsum_218 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_216, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_216 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_105 = paddle._C_ops.add(dropout_94, layer_norm_63) + del dropout_94, layer_norm_63 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_105, parameter_211, parameter_210, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_105, parameter_210, parameter_211 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_66, parameter_207, False, False) + del parameter_207 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_106 = paddle._C_ops.add(matmul_70, parameter_206) + del matmul_70, parameter_206 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_11 = paddle._C_ops.relu(add_106) + del add_106 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_11 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_71 = paddle._C_ops.matmul(dropout_96, parameter_205, False, False) + del dropout_96, parameter_205 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_107 = paddle._C_ops.add(matmul_71, parameter_204) + del matmul_71, parameter_204 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_107, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_107 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_108 = paddle._C_ops.add(dropout_98, layer_norm_66) + del dropout_98, layer_norm_66 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_108, parameter_209, parameter_208, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_108, parameter_208, parameter_209 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_69, parameter_203, False, False) + del parameter_203 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_84 = paddle._C_ops.reshape(matmul_72, full_int_array_5) + del matmul_72 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_69, parameter_202, False, False) + del parameter_202 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_85 = paddle._C_ops.reshape(matmul_73, full_int_array_5) + del matmul_73 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_69, parameter_201, False, False) + del parameter_201 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_86 = paddle._C_ops.reshape(matmul_74, full_int_array_5) + del matmul_74 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_75 = paddle._C_ops.matmul(dropout_2, parameter_199, False, False) + del parameter_199 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(matmul_75, full_int_array_6) + del matmul_75 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_109 = paddle._C_ops.add(reshape_84, parameter_196) + del parameter_196 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_74 = [add_109, reshape_85] + del add_109, reshape_85 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_219, einsum_220, einsum_221 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_74, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_74 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_292, + split_293, + ) = einsum_220 + del einsum_220 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_294, + split_295, + ) = einsum_221 + del einsum_221 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_110 = paddle._C_ops.add(reshape_84, parameter_198) + del parameter_198 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_75 = [add_110, reshape_87] + del add_110, reshape_87 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_222, einsum_223, einsum_224 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_75, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_75 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_296, + split_297, + ) = einsum_223 + del einsum_223 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_298, + split_299, + ) = einsum_224 + del einsum_224 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(einsum_222, full_int_array_7) + del einsum_222 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + reshape_88, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_88 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_89 = paddle._C_ops.reshape(slice_12, full_int_array_9) + del slice_12 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_12 = paddle._C_ops.index_select(reshape_89, arange_2, 3) + del reshape_89 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_111 = paddle._C_ops.add(reshape_84, parameter_197) + del parameter_197, reshape_84 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_76 = [add_111, parameter_195] + del add_111, parameter_195 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_225, einsum_226, einsum_227 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_76, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_76 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_300, + split_301, + ) = einsum_226 + del einsum_226 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_302, + split_303, + ) = einsum_227 + del einsum_227 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_77 = [cast_5, einsum_225] + del einsum_225 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_228, einsum_229, einsum_230 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_77, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_77 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_304, + split_305, + ) = einsum_229 + del einsum_229 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_306, + split_307, + ) = einsum_230 + del einsum_230 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_112 = paddle._C_ops.add(einsum_219, index_select_12) + del einsum_219, index_select_12 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_113 = paddle._C_ops.add(add_112, einsum_228) + del add_112, einsum_228 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_16 = paddle._C_ops.scale(add_113, full_16, float("0"), True) + del add_113 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_12 = paddle._C_ops.subtract(scale_16, scale_4) + del scale_16 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_12 = paddle._C_ops.softmax(subtract_12, 3) + del subtract_12 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_12, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_12 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_78 = [dropout_100, reshape_86] + del dropout_100, reshape_86 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_231, einsum_232, einsum_233 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_78, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_78 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_308, + split_309, + ) = einsum_232 + del einsum_232 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_310, + split_311, + ) = einsum_233 + del einsum_233 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_90 = paddle._C_ops.reshape(einsum_231, full_int_array_10) + del einsum_231 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_79 = [reshape_90, parameter_200] + del parameter_200, reshape_90 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_234, einsum_235, einsum_236 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_79, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_79 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_312, + split_313, + ) = einsum_235 + del einsum_235 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_314, + split_315, + ) = einsum_236 + del einsum_236 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_102, dropout_103 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_234, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_234 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_114 = paddle._C_ops.add(dropout_102, layer_norm_69) + del dropout_102, layer_norm_69 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_114, parameter_194, parameter_193, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_114, parameter_193, parameter_194 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_76 = paddle._C_ops.matmul(layer_norm_72, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_115 = paddle._C_ops.add(matmul_76, parameter_189) + del matmul_76, parameter_189 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_12 = paddle._C_ops.relu(add_115) + del add_115 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_104, dropout_105 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_12, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_12 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_77 = paddle._C_ops.matmul(dropout_104, parameter_188, False, False) + del dropout_104, parameter_188 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_116 = paddle._C_ops.add(matmul_77, parameter_187) + del matmul_77, parameter_187 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_106, dropout_107 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_116, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_116 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_117 = paddle._C_ops.add(dropout_106, layer_norm_72) + del dropout_106, layer_norm_72 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_117, parameter_192, parameter_191, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_117, parameter_191, parameter_192 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_75, parameter_186, False, False) + del parameter_186 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(matmul_78, full_int_array_5) + del matmul_78 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_75, parameter_185, False, False) + del parameter_185 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(matmul_79, full_int_array_5) + del matmul_79 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_75, parameter_184, False, False) + del parameter_184 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_93 = paddle._C_ops.reshape(matmul_80, full_int_array_5) + del matmul_80 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_81 = paddle._C_ops.matmul(dropout_2, parameter_182, False, False) + del parameter_182 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(matmul_81, full_int_array_6) + del matmul_81 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_118 = paddle._C_ops.add(reshape_91, parameter_179) + del parameter_179 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_80 = [add_118, reshape_92] + del add_118, reshape_92 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_237, einsum_238, einsum_239 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_80, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_80 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_316, + split_317, + ) = einsum_238 + del einsum_238 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_318, + split_319, + ) = einsum_239 + del einsum_239 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_119 = paddle._C_ops.add(reshape_91, parameter_181) + del parameter_181 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_81 = [add_119, reshape_94] + del add_119, reshape_94 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_240, einsum_241, einsum_242 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_81, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_81 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_320, + split_321, + ) = einsum_241 + del einsum_241 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_322, + split_323, + ) = einsum_242 + del einsum_242 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(einsum_240, full_int_array_7) + del einsum_240 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + reshape_95, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_95 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(slice_13, full_int_array_9) + del slice_13 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_13 = paddle._C_ops.index_select(reshape_96, arange_2, 3) + del reshape_96 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_120 = paddle._C_ops.add(reshape_91, parameter_180) + del parameter_180, reshape_91 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_82 = [add_120, parameter_178] + del add_120, parameter_178 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_243, einsum_244, einsum_245 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_82, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_82 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_324, + split_325, + ) = einsum_244 + del einsum_244 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_326, + split_327, + ) = einsum_245 + del einsum_245 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_83 = [cast_5, einsum_243] + del einsum_243 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_246, einsum_247, einsum_248 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_83, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_83 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_328, + split_329, + ) = einsum_247 + del einsum_247 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_330, + split_331, + ) = einsum_248 + del einsum_248 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_121 = paddle._C_ops.add(einsum_237, index_select_13) + del einsum_237, index_select_13 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_122 = paddle._C_ops.add(add_121, einsum_246) + del add_121, einsum_246 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_17 = paddle._C_ops.scale(add_122, full_16, float("0"), True) + del add_122 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_13 = paddle._C_ops.subtract(scale_17, scale_4) + del scale_17 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_13 = paddle._C_ops.softmax(subtract_13, 3) + del subtract_13 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_108, dropout_109 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_13 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_84 = [dropout_108, reshape_93] + del dropout_108, reshape_93 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_249, einsum_250, einsum_251 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_84, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_84 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_332, + split_333, + ) = einsum_250 + del einsum_250 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_334, + split_335, + ) = einsum_251 + del einsum_251 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(einsum_249, full_int_array_10) + del einsum_249 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_85 = [reshape_97, parameter_183] + del parameter_183, reshape_97 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_252, einsum_253, einsum_254 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_85, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_85 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_336, + split_337, + ) = einsum_253 + del einsum_253 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_338, + split_339, + ) = einsum_254 + del einsum_254 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_110, dropout_111 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_252, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_252 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_123 = paddle._C_ops.add(dropout_110, layer_norm_75) + del dropout_110, layer_norm_75 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_177, parameter_176, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_123, parameter_176, parameter_177 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_78, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_124 = paddle._C_ops.add(matmul_82, parameter_172) + del matmul_82, parameter_172 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_13 = paddle._C_ops.relu(add_124) + del add_124 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_112, dropout_113 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_13 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_83 = paddle._C_ops.matmul(dropout_112, parameter_171, False, False) + del dropout_112, parameter_171 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_125 = paddle._C_ops.add(matmul_83, parameter_170) + del matmul_83, parameter_170 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_114, dropout_115 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_125, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_125 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_126 = paddle._C_ops.add(dropout_114, layer_norm_78) + del dropout_114, layer_norm_78 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_175, parameter_174, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_126, parameter_174, parameter_175 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_81, parameter_169, False, False) + del parameter_169 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(matmul_84, full_int_array_5) + del matmul_84 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_81, parameter_168, False, False) + del parameter_168 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(matmul_85, full_int_array_5) + del matmul_85 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_86 = paddle._C_ops.matmul(layer_norm_81, parameter_167, False, False) + del parameter_167 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_100 = paddle._C_ops.reshape(matmul_86, full_int_array_5) + del matmul_86 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_87 = paddle._C_ops.matmul(dropout_2, parameter_165, False, False) + del parameter_165 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_101 = paddle._C_ops.reshape(matmul_87, full_int_array_6) + del matmul_87 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_127 = paddle._C_ops.add(reshape_98, parameter_162) + del parameter_162 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_86 = [add_127, reshape_99] + del add_127, reshape_99 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_255, einsum_256, einsum_257 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_86, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_86 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_340, + split_341, + ) = einsum_256 + del einsum_256 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_342, + split_343, + ) = einsum_257 + del einsum_257 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_128 = paddle._C_ops.add(reshape_98, parameter_164) + del parameter_164 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_87 = [add_128, reshape_101] + del add_128, reshape_101 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_258, einsum_259, einsum_260 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_87, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_87 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_344, + split_345, + ) = einsum_259 + del einsum_259 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_346, + split_347, + ) = einsum_260 + del einsum_260 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(einsum_258, full_int_array_7) + del einsum_258 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + reshape_102, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_102 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_103 = paddle._C_ops.reshape(slice_14, full_int_array_9) + del slice_14 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_14 = paddle._C_ops.index_select(reshape_103, arange_2, 3) + del reshape_103 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_129 = paddle._C_ops.add(reshape_98, parameter_163) + del parameter_163, reshape_98 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_88 = [add_129, parameter_161] + del add_129, parameter_161 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_261, einsum_262, einsum_263 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_88, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_88 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_348, + split_349, + ) = einsum_262 + del einsum_262 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_350, + split_351, + ) = einsum_263 + del einsum_263 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_89 = [cast_5, einsum_261] + del einsum_261 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_264, einsum_265, einsum_266 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_89, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_89 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_352, + split_353, + ) = einsum_265 + del einsum_265 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_354, + split_355, + ) = einsum_266 + del einsum_266 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_130 = paddle._C_ops.add(einsum_255, index_select_14) + del einsum_255, index_select_14 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_131 = paddle._C_ops.add(add_130, einsum_264) + del add_130, einsum_264 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_18 = paddle._C_ops.scale(add_131, full_16, float("0"), True) + del add_131 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_14 = paddle._C_ops.subtract(scale_18, scale_4) + del scale_18 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_14 = paddle._C_ops.softmax(subtract_14, 3) + del subtract_14 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_116, dropout_117 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_14, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_14 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_90 = [dropout_116, reshape_100] + del dropout_116, reshape_100 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_267, einsum_268, einsum_269 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_90, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_90 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_356, + split_357, + ) = einsum_268 + del einsum_268 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_358, + split_359, + ) = einsum_269 + del einsum_269 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_104 = paddle._C_ops.reshape(einsum_267, full_int_array_10) + del einsum_267 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_91 = [reshape_104, parameter_166] + del parameter_166, reshape_104 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_270, einsum_271, einsum_272 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_91, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_91 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_360, + split_361, + ) = einsum_271 + del einsum_271 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_362, + split_363, + ) = einsum_272 + del einsum_272 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_118, dropout_119 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_270, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_270 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_132 = paddle._C_ops.add(dropout_118, layer_norm_81) + del dropout_118, layer_norm_81 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_160, parameter_159, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_132, parameter_159, parameter_160 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_88 = paddle._C_ops.matmul(layer_norm_84, parameter_156, False, False) + del parameter_156 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_133 = paddle._C_ops.add(matmul_88, parameter_155) + del matmul_88, parameter_155 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_14 = paddle._C_ops.relu(add_133) + del add_133 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_120, dropout_121 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_14, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_14 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_89 = paddle._C_ops.matmul(dropout_120, parameter_154, False, False) + del dropout_120, parameter_154 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_134 = paddle._C_ops.add(matmul_89, parameter_153) + del matmul_89, parameter_153 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_122, dropout_123 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_134, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_134 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_135 = paddle._C_ops.add(dropout_122, layer_norm_84) + del dropout_122, layer_norm_84 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_135, parameter_158, parameter_157, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_135, parameter_157, parameter_158 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_87, parameter_152, False, False) + del parameter_152 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(matmul_90, full_int_array_5) + del matmul_90 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_87, parameter_151, False, False) + del parameter_151 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(matmul_91, full_int_array_5) + del matmul_91 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_92 = paddle._C_ops.matmul(layer_norm_87, parameter_150, False, False) + del parameter_150 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(matmul_92, full_int_array_5) + del matmul_92 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_93 = paddle._C_ops.matmul(dropout_2, parameter_148, False, False) + del parameter_148 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(matmul_93, full_int_array_6) + del matmul_93 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_136 = paddle._C_ops.add(reshape_105, parameter_145) + del parameter_145 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_92 = [add_136, reshape_106] + del add_136, reshape_106 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_273, einsum_274, einsum_275 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_92, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_92 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_364, + split_365, + ) = einsum_274 + del einsum_274 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_366, + split_367, + ) = einsum_275 + del einsum_275 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_137 = paddle._C_ops.add(reshape_105, parameter_147) + del parameter_147 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_93 = [add_137, reshape_108] + del add_137, reshape_108 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_276, einsum_277, einsum_278 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_93, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_93 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_368, + split_369, + ) = einsum_277 + del einsum_277 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_370, + split_371, + ) = einsum_278 + del einsum_278 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_109 = paddle._C_ops.reshape(einsum_276, full_int_array_7) + del einsum_276 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + reshape_109, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_109 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_110 = paddle._C_ops.reshape(slice_15, full_int_array_9) + del slice_15 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_15 = paddle._C_ops.index_select(reshape_110, arange_2, 3) + del reshape_110 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_138 = paddle._C_ops.add(reshape_105, parameter_146) + del parameter_146, reshape_105 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_94 = [add_138, parameter_144] + del add_138, parameter_144 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_279, einsum_280, einsum_281 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_94, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_94 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_372, + split_373, + ) = einsum_280 + del einsum_280 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_374, + split_375, + ) = einsum_281 + del einsum_281 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_95 = [cast_5, einsum_279] + del einsum_279 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_282, einsum_283, einsum_284 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_95, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_95 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_376, + split_377, + ) = einsum_283 + del einsum_283 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_378, + split_379, + ) = einsum_284 + del einsum_284 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_139 = paddle._C_ops.add(einsum_273, index_select_15) + del einsum_273, index_select_15 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_140 = paddle._C_ops.add(add_139, einsum_282) + del add_139, einsum_282 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_19 = paddle._C_ops.scale(add_140, full_16, float("0"), True) + del add_140 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_15 = paddle._C_ops.subtract(scale_19, scale_4) + del scale_19 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_15 = paddle._C_ops.softmax(subtract_15, 3) + del subtract_15 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_124, dropout_125 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_15 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_96 = [dropout_124, reshape_107] + del dropout_124, reshape_107 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_285, einsum_286, einsum_287 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_96, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_96 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_380, + split_381, + ) = einsum_286 + del einsum_286 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_382, + split_383, + ) = einsum_287 + del einsum_287 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(einsum_285, full_int_array_10) + del einsum_285 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_97 = [reshape_111, parameter_149] + del parameter_149, reshape_111 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_288, einsum_289, einsum_290 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_97, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_97 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_384, + split_385, + ) = einsum_289 + del einsum_289 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_386, + split_387, + ) = einsum_290 + del einsum_290 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_126, dropout_127 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_288, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_288 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_141 = paddle._C_ops.add(dropout_126, layer_norm_87) + del dropout_126, layer_norm_87 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_141, parameter_143, parameter_142, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_141, parameter_142, parameter_143 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_94 = paddle._C_ops.matmul(layer_norm_90, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_142 = paddle._C_ops.add(matmul_94, parameter_138) + del matmul_94, parameter_138 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_15 = paddle._C_ops.relu(add_142) + del add_142 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_128, dropout_129 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_15 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_95 = paddle._C_ops.matmul(dropout_128, parameter_137, False, False) + del dropout_128, parameter_137 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_143 = paddle._C_ops.add(matmul_95, parameter_136) + del matmul_95, parameter_136 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_130, dropout_131 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_143, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_143 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_144 = paddle._C_ops.add(dropout_130, layer_norm_90) + del dropout_130, layer_norm_90 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_144, parameter_141, parameter_140, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_144, parameter_140, parameter_141 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_93, parameter_135, False, False) + del parameter_135 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(matmul_96, full_int_array_5) + del matmul_96 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_97 = paddle._C_ops.matmul(layer_norm_93, parameter_134, False, False) + del parameter_134 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(matmul_97, full_int_array_5) + del matmul_97 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_98 = paddle._C_ops.matmul(layer_norm_93, parameter_133, False, False) + del parameter_133 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_114 = paddle._C_ops.reshape(matmul_98, full_int_array_5) + del matmul_98 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_99 = paddle._C_ops.matmul(dropout_2, parameter_131, False, False) + del parameter_131 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(matmul_99, full_int_array_6) + del matmul_99 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_145 = paddle._C_ops.add(reshape_112, parameter_128) + del parameter_128 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_98 = [add_145, reshape_113] + del add_145, reshape_113 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_291, einsum_292, einsum_293 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_98, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_98 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_388, + split_389, + ) = einsum_292 + del einsum_292 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_390, + split_391, + ) = einsum_293 + del einsum_293 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_146 = paddle._C_ops.add(reshape_112, parameter_130) + del parameter_130 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_99 = [add_146, reshape_115] + del add_146, reshape_115 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_294, einsum_295, einsum_296 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_99, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_99 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_392, + split_393, + ) = einsum_295 + del einsum_295 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_394, + split_395, + ) = einsum_296 + del einsum_296 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(einsum_294, full_int_array_7) + del einsum_294 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + reshape_116, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_116 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(slice_16, full_int_array_9) + del slice_16 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_16 = paddle._C_ops.index_select(reshape_117, arange_2, 3) + del reshape_117 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_147 = paddle._C_ops.add(reshape_112, parameter_129) + del parameter_129, reshape_112 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_100 = [add_147, parameter_127] + del add_147, parameter_127 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_297, einsum_298, einsum_299 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_100, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_100 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_396, + split_397, + ) = einsum_298 + del einsum_298 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_398, + split_399, + ) = einsum_299 + del einsum_299 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_101 = [cast_5, einsum_297] + del einsum_297 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_300, einsum_301, einsum_302 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_101, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_101 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_400, + split_401, + ) = einsum_301 + del einsum_301 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_402, + split_403, + ) = einsum_302 + del einsum_302 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_148 = paddle._C_ops.add(einsum_291, index_select_16) + del einsum_291, index_select_16 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_149 = paddle._C_ops.add(add_148, einsum_300) + del add_148, einsum_300 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_20 = paddle._C_ops.scale(add_149, full_16, float("0"), True) + del add_149 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_16 = paddle._C_ops.subtract(scale_20, scale_4) + del scale_20 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_16 = paddle._C_ops.softmax(subtract_16, 3) + del subtract_16 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_132, dropout_133 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_16, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_16 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_102 = [dropout_132, reshape_114] + del dropout_132, reshape_114 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_303, einsum_304, einsum_305 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_102, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_102 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_404, + split_405, + ) = einsum_304 + del einsum_304 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_406, + split_407, + ) = einsum_305 + del einsum_305 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(einsum_303, full_int_array_10) + del einsum_303 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_103 = [reshape_118, parameter_132] + del parameter_132, reshape_118 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_306, einsum_307, einsum_308 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_103, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_103 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_408, + split_409, + ) = einsum_307 + del einsum_307 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_410, + split_411, + ) = einsum_308 + del einsum_308 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_134, dropout_135 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_306, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_306 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_150 = paddle._C_ops.add(dropout_134, layer_norm_93) + del dropout_134, layer_norm_93 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_150, parameter_126, parameter_125, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_150, parameter_125, parameter_126 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_96, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_151 = paddle._C_ops.add(matmul_100, parameter_121) + del matmul_100, parameter_121 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_16 = paddle._C_ops.relu(add_151) + del add_151 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_136, dropout_137 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_16, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_16 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_101 = paddle._C_ops.matmul(dropout_136, parameter_120, False, False) + del dropout_136, parameter_120 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_152 = paddle._C_ops.add(matmul_101, parameter_119) + del matmul_101, parameter_119 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_138, dropout_139 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_152, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_152 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_153 = paddle._C_ops.add(dropout_138, layer_norm_96) + del dropout_138, layer_norm_96 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_153, parameter_124, parameter_123, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_153, parameter_123, parameter_124 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_99, parameter_118, False, False) + del parameter_118 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(matmul_102, full_int_array_5) + del matmul_102 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_103 = paddle._C_ops.matmul(layer_norm_99, parameter_117, False, False) + del parameter_117 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_120 = paddle._C_ops.reshape(matmul_103, full_int_array_5) + del matmul_103 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_104 = paddle._C_ops.matmul(layer_norm_99, parameter_116, False, False) + del parameter_116 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(matmul_104, full_int_array_5) + del matmul_104 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_105 = paddle._C_ops.matmul(dropout_2, parameter_114, False, False) + del parameter_114 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(matmul_105, full_int_array_6) + del matmul_105 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_154 = paddle._C_ops.add(reshape_119, parameter_111) + del parameter_111 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_104 = [add_154, reshape_120] + del add_154, reshape_120 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_309, einsum_310, einsum_311 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_104, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_104 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_412, + split_413, + ) = einsum_310 + del einsum_310 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_414, + split_415, + ) = einsum_311 + del einsum_311 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_155 = paddle._C_ops.add(reshape_119, parameter_113) + del parameter_113 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_105 = [add_155, reshape_122] + del add_155, reshape_122 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_312, einsum_313, einsum_314 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_105, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_105 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_416, + split_417, + ) = einsum_313 + del einsum_313 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_418, + split_419, + ) = einsum_314 + del einsum_314 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(einsum_312, full_int_array_7) + del einsum_312 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + reshape_123, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_123 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(slice_17, full_int_array_9) + del slice_17 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_17 = paddle._C_ops.index_select(reshape_124, arange_2, 3) + del reshape_124 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_156 = paddle._C_ops.add(reshape_119, parameter_112) + del parameter_112, reshape_119 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_106 = [add_156, parameter_110] + del add_156, parameter_110 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_315, einsum_316, einsum_317 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_106, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_106 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_420, + split_421, + ) = einsum_316 + del einsum_316 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_422, + split_423, + ) = einsum_317 + del einsum_317 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_107 = [cast_5, einsum_315] + del einsum_315 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_318, einsum_319, einsum_320 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_107, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_107 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_424, + split_425, + ) = einsum_319 + del einsum_319 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_426, + split_427, + ) = einsum_320 + del einsum_320 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_157 = paddle._C_ops.add(einsum_309, index_select_17) + del einsum_309, index_select_17 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_158 = paddle._C_ops.add(add_157, einsum_318) + del add_157, einsum_318 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_21 = paddle._C_ops.scale(add_158, full_16, float("0"), True) + del add_158 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_17 = paddle._C_ops.subtract(scale_21, scale_4) + del scale_21 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_17 = paddle._C_ops.softmax(subtract_17, 3) + del subtract_17 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_140, dropout_141 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_17 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_108 = [dropout_140, reshape_121] + del dropout_140, reshape_121 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_321, einsum_322, einsum_323 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_108, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_108 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_428, + split_429, + ) = einsum_322 + del einsum_322 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_430, + split_431, + ) = einsum_323 + del einsum_323 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_125 = paddle._C_ops.reshape(einsum_321, full_int_array_10) + del einsum_321 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_109 = [reshape_125, parameter_115] + del parameter_115, reshape_125 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_324, einsum_325, einsum_326 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_109, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_109 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_432, + split_433, + ) = einsum_325 + del einsum_325 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_434, + split_435, + ) = einsum_326 + del einsum_326 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_142, dropout_143 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_324, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_324 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_159 = paddle._C_ops.add(dropout_142, layer_norm_99) + del dropout_142, layer_norm_99 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_159, parameter_109, parameter_108, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_159, parameter_108, parameter_109 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_106 = paddle._C_ops.matmul(layer_norm_102, parameter_105, False, False) + del parameter_105 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_160 = paddle._C_ops.add(matmul_106, parameter_104) + del matmul_106, parameter_104 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_17 = paddle._C_ops.relu(add_160) + del add_160 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_144, dropout_145 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_17 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_107 = paddle._C_ops.matmul(dropout_144, parameter_103, False, False) + del dropout_144, parameter_103 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_161 = paddle._C_ops.add(matmul_107, parameter_102) + del matmul_107, parameter_102 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_146, dropout_147 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_161, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_161 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_162 = paddle._C_ops.add(dropout_146, layer_norm_102) + del dropout_146, layer_norm_102 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_162, parameter_107, parameter_106, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_162, parameter_106, parameter_107 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_108 = paddle._C_ops.matmul(layer_norm_105, parameter_101, False, False) + del parameter_101 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_126 = paddle._C_ops.reshape(matmul_108, full_int_array_5) + del matmul_108 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_109 = paddle._C_ops.matmul(layer_norm_105, parameter_100, False, False) + del parameter_100 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_127 = paddle._C_ops.reshape(matmul_109, full_int_array_5) + del matmul_109 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_105, parameter_99, False, False) + del parameter_99 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_128 = paddle._C_ops.reshape(matmul_110, full_int_array_5) + del matmul_110 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_111 = paddle._C_ops.matmul(dropout_2, parameter_97, False, False) + del parameter_97 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(matmul_111, full_int_array_6) + del matmul_111 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_163 = paddle._C_ops.add(reshape_126, parameter_94) + del parameter_94 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_110 = [add_163, reshape_127] + del add_163, reshape_127 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_327, einsum_328, einsum_329 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_110, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_110 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_436, + split_437, + ) = einsum_328 + del einsum_328 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_438, + split_439, + ) = einsum_329 + del einsum_329 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_164 = paddle._C_ops.add(reshape_126, parameter_96) + del parameter_96 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_111 = [add_164, reshape_129] + del add_164, reshape_129 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_330, einsum_331, einsum_332 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_111, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_111 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_440, + split_441, + ) = einsum_331 + del einsum_331 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_442, + split_443, + ) = einsum_332 + del einsum_332 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(einsum_330, full_int_array_7) + del einsum_330 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + reshape_130, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_130 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(slice_18, full_int_array_9) + del slice_18 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_18 = paddle._C_ops.index_select(reshape_131, arange_2, 3) + del reshape_131 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_165 = paddle._C_ops.add(reshape_126, parameter_95) + del parameter_95, reshape_126 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_112 = [add_165, parameter_93] + del add_165, parameter_93 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_333, einsum_334, einsum_335 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_112, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_112 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_444, + split_445, + ) = einsum_334 + del einsum_334 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_446, + split_447, + ) = einsum_335 + del einsum_335 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_113 = [cast_5, einsum_333] + del einsum_333 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_336, einsum_337, einsum_338 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_113, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_113 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_448, + split_449, + ) = einsum_337 + del einsum_337 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_450, + split_451, + ) = einsum_338 + del einsum_338 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_166 = paddle._C_ops.add(einsum_327, index_select_18) + del einsum_327, index_select_18 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_167 = paddle._C_ops.add(add_166, einsum_336) + del add_166, einsum_336 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_22 = paddle._C_ops.scale(add_167, full_16, float("0"), True) + del add_167 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_18 = paddle._C_ops.subtract(scale_22, scale_4) + del scale_22 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_18 = paddle._C_ops.softmax(subtract_18, 3) + del subtract_18 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_148, dropout_149 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_18 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_114 = [dropout_148, reshape_128] + del dropout_148, reshape_128 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_339, einsum_340, einsum_341 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_114, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_114 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_452, + split_453, + ) = einsum_340 + del einsum_340 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_454, + split_455, + ) = einsum_341 + del einsum_341 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_132 = paddle._C_ops.reshape(einsum_339, full_int_array_10) + del einsum_339 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_115 = [reshape_132, parameter_98] + del parameter_98, reshape_132 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_342, einsum_343, einsum_344 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_115, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_115 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_456, + split_457, + ) = einsum_343 + del einsum_343 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_458, + split_459, + ) = einsum_344 + del einsum_344 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_150, dropout_151 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_342, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_342 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_168 = paddle._C_ops.add(dropout_150, layer_norm_105) + del dropout_150, layer_norm_105 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_92, parameter_91, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_168, parameter_91, parameter_92 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_108, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_169 = paddle._C_ops.add(matmul_112, parameter_87) + del matmul_112, parameter_87 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_18 = paddle._C_ops.relu(add_169) + del add_169 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_152, dropout_153 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_18 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_113 = paddle._C_ops.matmul(dropout_152, parameter_86, False, False) + del dropout_152, parameter_86 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_170 = paddle._C_ops.add(matmul_113, parameter_85) + del matmul_113, parameter_85 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_154, dropout_155 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_170, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_170 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_171 = paddle._C_ops.add(dropout_154, layer_norm_108) + del dropout_154, layer_norm_108 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_171, parameter_90, parameter_89, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_171, parameter_89, parameter_90 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_111, parameter_84, False, False) + del parameter_84 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(matmul_114, full_int_array_5) + del matmul_114 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_115 = paddle._C_ops.matmul(layer_norm_111, parameter_83, False, False) + del parameter_83 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_134 = paddle._C_ops.reshape(matmul_115, full_int_array_5) + del matmul_115 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_111, parameter_82, False, False) + del parameter_82 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(matmul_116, full_int_array_5) + del matmul_116 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_117 = paddle._C_ops.matmul(dropout_2, parameter_80, False, False) + del parameter_80 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_136 = paddle._C_ops.reshape(matmul_117, full_int_array_6) + del matmul_117 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_172 = paddle._C_ops.add(reshape_133, parameter_77) + del parameter_77 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_116 = [add_172, reshape_134] + del add_172, reshape_134 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_345, einsum_346, einsum_347 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_116, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_116 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_460, + split_461, + ) = einsum_346 + del einsum_346 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_462, + split_463, + ) = einsum_347 + del einsum_347 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_173 = paddle._C_ops.add(reshape_133, parameter_79) + del parameter_79 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_117 = [add_173, reshape_136] + del add_173, reshape_136 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_348, einsum_349, einsum_350 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_117, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_117 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_464, + split_465, + ) = einsum_349 + del einsum_349 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_466, + split_467, + ) = einsum_350 + del einsum_350 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(einsum_348, full_int_array_7) + del einsum_348 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + reshape_137, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_137 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(slice_19, full_int_array_9) + del slice_19 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_19 = paddle._C_ops.index_select(reshape_138, arange_2, 3) + del reshape_138 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_174 = paddle._C_ops.add(reshape_133, parameter_78) + del parameter_78, reshape_133 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_118 = [add_174, parameter_76] + del add_174, parameter_76 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_351, einsum_352, einsum_353 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_118, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_118 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_468, + split_469, + ) = einsum_352 + del einsum_352 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_470, + split_471, + ) = einsum_353 + del einsum_353 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_119 = [cast_5, einsum_351] + del einsum_351 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_354, einsum_355, einsum_356 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_119, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_119 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_472, + split_473, + ) = einsum_355 + del einsum_355 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_474, + split_475, + ) = einsum_356 + del einsum_356 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_175 = paddle._C_ops.add(einsum_345, index_select_19) + del einsum_345, index_select_19 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_176 = paddle._C_ops.add(add_175, einsum_354) + del add_175, einsum_354 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_23 = paddle._C_ops.scale(add_176, full_16, float("0"), True) + del add_176 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_19 = paddle._C_ops.subtract(scale_23, scale_4) + del scale_23 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_19 = paddle._C_ops.softmax(subtract_19, 3) + del subtract_19 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_156, dropout_157 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_19, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_19 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_120 = [dropout_156, reshape_135] + del dropout_156, reshape_135 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_357, einsum_358, einsum_359 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_120, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_120 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_476, + split_477, + ) = einsum_358 + del einsum_358 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_478, + split_479, + ) = einsum_359 + del einsum_359 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_139 = paddle._C_ops.reshape(einsum_357, full_int_array_10) + del einsum_357 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_121 = [reshape_139, parameter_81] + del parameter_81, reshape_139 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_360, einsum_361, einsum_362 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_121, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_121 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_480, + split_481, + ) = einsum_361 + del einsum_361 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_482, + split_483, + ) = einsum_362 + del einsum_362 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_158, dropout_159 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_360, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_360 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_177 = paddle._C_ops.add(dropout_158, layer_norm_111) + del dropout_158, layer_norm_111 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_177, parameter_75, parameter_74, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_177, parameter_74, parameter_75 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_118 = paddle._C_ops.matmul(layer_norm_114, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_178 = paddle._C_ops.add(matmul_118, parameter_70) + del matmul_118, parameter_70 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_19 = paddle._C_ops.relu(add_178) + del add_178 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_160, dropout_161 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_19, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_19 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_119 = paddle._C_ops.matmul(dropout_160, parameter_69, False, False) + del dropout_160, parameter_69 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_179 = paddle._C_ops.add(matmul_119, parameter_68) + del matmul_119, parameter_68 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_162, dropout_163 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_179, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_179 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_180 = paddle._C_ops.add(dropout_162, layer_norm_114) + del dropout_162, layer_norm_114 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_180, parameter_73, parameter_72, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_180, parameter_72, parameter_73 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_117, parameter_67, False, False) + del parameter_67 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(matmul_120, full_int_array_5) + del matmul_120 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_117, parameter_66, False, False) + del parameter_66 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(matmul_121, full_int_array_5) + del matmul_121 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_122 = paddle._C_ops.matmul(layer_norm_117, parameter_65, False, False) + del parameter_65 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(matmul_122, full_int_array_5) + del matmul_122 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_123 = paddle._C_ops.matmul(dropout_2, parameter_63, False, False) + del parameter_63 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_143 = paddle._C_ops.reshape(matmul_123, full_int_array_6) + del matmul_123 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_181 = paddle._C_ops.add(reshape_140, parameter_60) + del parameter_60 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_122 = [add_181, reshape_141] + del add_181, reshape_141 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_363, einsum_364, einsum_365 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_122, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_122 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_484, + split_485, + ) = einsum_364 + del einsum_364 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_486, + split_487, + ) = einsum_365 + del einsum_365 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_182 = paddle._C_ops.add(reshape_140, parameter_62) + del parameter_62 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_123 = [add_182, reshape_143] + del add_182, reshape_143 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_366, einsum_367, einsum_368 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_123, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_123 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_488, + split_489, + ) = einsum_367 + del einsum_367 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_490, + split_491, + ) = einsum_368 + del einsum_368 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_144 = paddle._C_ops.reshape(einsum_366, full_int_array_7) + del einsum_366 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + reshape_144, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_144 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_145 = paddle._C_ops.reshape(slice_20, full_int_array_9) + del slice_20 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_20 = paddle._C_ops.index_select(reshape_145, arange_2, 3) + del reshape_145 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_183 = paddle._C_ops.add(reshape_140, parameter_61) + del parameter_61, reshape_140 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_124 = [add_183, parameter_59] + del add_183, parameter_59 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_369, einsum_370, einsum_371 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_124, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_124 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_492, + split_493, + ) = einsum_370 + del einsum_370 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_494, + split_495, + ) = einsum_371 + del einsum_371 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_125 = [cast_5, einsum_369] + del einsum_369 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_372, einsum_373, einsum_374 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_125, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_125 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_496, + split_497, + ) = einsum_373 + del einsum_373 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_498, + split_499, + ) = einsum_374 + del einsum_374 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_184 = paddle._C_ops.add(einsum_363, index_select_20) + del einsum_363, index_select_20 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_185 = paddle._C_ops.add(add_184, einsum_372) + del add_184, einsum_372 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_24 = paddle._C_ops.scale(add_185, full_16, float("0"), True) + del add_185 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_20 = paddle._C_ops.subtract(scale_24, scale_4) + del scale_24 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_20 = paddle._C_ops.softmax(subtract_20, 3) + del subtract_20 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_164, dropout_165 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_20, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_20 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_126 = [dropout_164, reshape_142] + del dropout_164, reshape_142 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_375, einsum_376, einsum_377 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_126, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_126 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_500, + split_501, + ) = einsum_376 + del einsum_376 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_502, + split_503, + ) = einsum_377 + del einsum_377 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_146 = paddle._C_ops.reshape(einsum_375, full_int_array_10) + del einsum_375 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_127 = [reshape_146, parameter_64] + del parameter_64, reshape_146 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_378, einsum_379, einsum_380 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_127, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_127 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_504, + split_505, + ) = einsum_379 + del einsum_379 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_506, + split_507, + ) = einsum_380 + del einsum_380 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_166, dropout_167 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_378, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_378 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_186 = paddle._C_ops.add(dropout_166, layer_norm_117) + del dropout_166, layer_norm_117 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_186, parameter_58, parameter_57, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_186, parameter_57, parameter_58 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_124 = paddle._C_ops.matmul(layer_norm_120, parameter_54, False, False) + del parameter_54 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_187 = paddle._C_ops.add(matmul_124, parameter_53) + del matmul_124, parameter_53 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_20 = paddle._C_ops.relu(add_187) + del add_187 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_168, dropout_169 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_20, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_20 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_125 = paddle._C_ops.matmul(dropout_168, parameter_52, False, False) + del dropout_168, parameter_52 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_188 = paddle._C_ops.add(matmul_125, parameter_51) + del matmul_125, parameter_51 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_170, dropout_171 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_188, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_188 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_189 = paddle._C_ops.add(dropout_170, layer_norm_120) + del dropout_170, layer_norm_120 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_189, parameter_56, parameter_55, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_189, parameter_55, parameter_56 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_123, parameter_50, False, False) + del parameter_50 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(matmul_126, full_int_array_5) + del matmul_126 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_127 = paddle._C_ops.matmul(layer_norm_123, parameter_49, False, False) + del parameter_49 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(matmul_127, full_int_array_5) + del matmul_127 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_128 = paddle._C_ops.matmul(layer_norm_123, parameter_48, False, False) + del parameter_48 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(matmul_128, full_int_array_5) + del matmul_128 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_129 = paddle._C_ops.matmul(dropout_2, parameter_46, False, False) + del parameter_46 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(matmul_129, full_int_array_6) + del matmul_129 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_190 = paddle._C_ops.add(reshape_147, parameter_43) + del parameter_43 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_128 = [add_190, reshape_148] + del add_190, reshape_148 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_381, einsum_382, einsum_383 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_128, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_128 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_508, + split_509, + ) = einsum_382 + del einsum_382 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_510, + split_511, + ) = einsum_383 + del einsum_383 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_191 = paddle._C_ops.add(reshape_147, parameter_45) + del parameter_45 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_129 = [add_191, reshape_150] + del add_191, reshape_150 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_384, einsum_385, einsum_386 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_129, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_129 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_512, + split_513, + ) = einsum_385 + del einsum_385 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_514, + split_515, + ) = einsum_386 + del einsum_386 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_151 = paddle._C_ops.reshape(einsum_384, full_int_array_7) + del einsum_384 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + reshape_151, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_151 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(slice_21, full_int_array_9) + del slice_21 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_21 = paddle._C_ops.index_select(reshape_152, arange_2, 3) + del reshape_152 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_192 = paddle._C_ops.add(reshape_147, parameter_44) + del parameter_44, reshape_147 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_130 = [add_192, parameter_42] + del add_192, parameter_42 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_387, einsum_388, einsum_389 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_130, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_130 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_516, + split_517, + ) = einsum_388 + del einsum_388 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_518, + split_519, + ) = einsum_389 + del einsum_389 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_131 = [cast_5, einsum_387] + del einsum_387 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_390, einsum_391, einsum_392 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_131, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_131 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_520, + split_521, + ) = einsum_391 + del einsum_391 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_522, + split_523, + ) = einsum_392 + del einsum_392 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_193 = paddle._C_ops.add(einsum_381, index_select_21) + del einsum_381, index_select_21 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_194 = paddle._C_ops.add(add_193, einsum_390) + del add_193, einsum_390 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_25 = paddle._C_ops.scale(add_194, full_16, float("0"), True) + del add_194 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_21 = paddle._C_ops.subtract(scale_25, scale_4) + del scale_25 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_21 = paddle._C_ops.softmax(subtract_21, 3) + del subtract_21 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_172, dropout_173 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_21 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_132 = [dropout_172, reshape_149] + del dropout_172, reshape_149 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_393, einsum_394, einsum_395 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_132, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_132 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_524, + split_525, + ) = einsum_394 + del einsum_394 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_526, + split_527, + ) = einsum_395 + del einsum_395 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_153 = paddle._C_ops.reshape(einsum_393, full_int_array_10) + del einsum_393 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_133 = [reshape_153, parameter_47] + del parameter_47, reshape_153 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_396, einsum_397, einsum_398 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_133, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_133 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_528, + split_529, + ) = einsum_397 + del einsum_397 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_530, + split_531, + ) = einsum_398 + del einsum_398 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_174, dropout_175 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_396, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_396 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_195 = paddle._C_ops.add(dropout_174, layer_norm_123) + del dropout_174, layer_norm_123 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_195, parameter_41, parameter_40, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_195, parameter_40, parameter_41 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_130 = paddle._C_ops.matmul(layer_norm_126, parameter_37, False, False) + del parameter_37 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_196 = paddle._C_ops.add(matmul_130, parameter_36) + del matmul_130, parameter_36 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_21 = paddle._C_ops.relu(add_196) + del add_196 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_176, dropout_177 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_21 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_131 = paddle._C_ops.matmul(dropout_176, parameter_35, False, False) + del dropout_176, parameter_35 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_197 = paddle._C_ops.add(matmul_131, parameter_34) + del matmul_131, parameter_34 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_178, dropout_179 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_197, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_197 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_198 = paddle._C_ops.add(dropout_178, layer_norm_126) + del dropout_178, layer_norm_126 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_198, parameter_39, parameter_38, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_198, parameter_38, parameter_39 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_132 = paddle._C_ops.matmul(layer_norm_129, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_154 = paddle._C_ops.reshape(matmul_132, full_int_array_5) + del matmul_132 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_133 = paddle._C_ops.matmul(layer_norm_129, parameter_32, False, False) + del parameter_32 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(matmul_133, full_int_array_5) + del matmul_133 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_129, parameter_31, False, False) + del parameter_31 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(matmul_134, full_int_array_5) + del matmul_134 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_135 = paddle._C_ops.matmul(dropout_2, parameter_29, False, False) + del parameter_29 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_157 = paddle._C_ops.reshape(matmul_135, full_int_array_6) + del matmul_135 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_199 = paddle._C_ops.add(reshape_154, parameter_26) + del parameter_26 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_134 = [add_199, reshape_155] + del add_199, reshape_155 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_399, einsum_400, einsum_401 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_134, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_134 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_532, + split_533, + ) = einsum_400 + del einsum_400 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_534, + split_535, + ) = einsum_401 + del einsum_401 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_200 = paddle._C_ops.add(reshape_154, parameter_28) + del parameter_28 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_135 = [add_200, reshape_157] + del add_200, reshape_157 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_402, einsum_403, einsum_404 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_135, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_135 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_536, + split_537, + ) = einsum_403 + del einsum_403 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_538, + split_539, + ) = einsum_404 + del einsum_404 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(einsum_402, full_int_array_7) + del einsum_402 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + reshape_158, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_158 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(slice_22, full_int_array_9) + del slice_22 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_22 = paddle._C_ops.index_select(reshape_159, arange_2, 3) + del reshape_159 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_201 = paddle._C_ops.add(reshape_154, parameter_27) + del parameter_27, reshape_154 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_136 = [add_201, parameter_25] + del add_201, parameter_25 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_405, einsum_406, einsum_407 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_136, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_136 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_540, + split_541, + ) = einsum_406 + del einsum_406 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_542, + split_543, + ) = einsum_407 + del einsum_407 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_137 = [cast_5, einsum_405] + del einsum_405 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_408, einsum_409, einsum_410 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_137, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_137 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_544, + split_545, + ) = einsum_409 + del einsum_409 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_546, + split_547, + ) = einsum_410 + del einsum_410 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_202 = paddle._C_ops.add(einsum_399, index_select_22) + del einsum_399, index_select_22 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_203 = paddle._C_ops.add(add_202, einsum_408) + del add_202, einsum_408 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_26 = paddle._C_ops.scale(add_203, full_16, float("0"), True) + del add_203 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_22 = paddle._C_ops.subtract(scale_26, scale_4) + del scale_26 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_22 = paddle._C_ops.softmax(subtract_22, 3) + del subtract_22 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_180, dropout_181 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_22, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_22 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_138 = [dropout_180, reshape_156] + del dropout_180, reshape_156 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_411, einsum_412, einsum_413 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_138, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_138 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_548, + split_549, + ) = einsum_412 + del einsum_412 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_550, + split_551, + ) = einsum_413 + del einsum_413 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(einsum_411, full_int_array_10) + del einsum_411 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_139 = [reshape_160, parameter_30] + del parameter_30, reshape_160 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_414, einsum_415, einsum_416 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_139, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_139 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_552, + split_553, + ) = einsum_415 + del einsum_415 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_554, + split_555, + ) = einsum_416 + del einsum_416 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_182, dropout_183 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_414, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_414 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_204 = paddle._C_ops.add(dropout_182, layer_norm_129) + del dropout_182, layer_norm_129 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_204, parameter_24, parameter_23, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_204, parameter_23, parameter_24 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_136 = paddle._C_ops.matmul(layer_norm_132, parameter_20, False, False) + del parameter_20 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_205 = paddle._C_ops.add(matmul_136, parameter_19) + del matmul_136, parameter_19 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_22 = paddle._C_ops.relu(add_205) + del add_205 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_184, dropout_185 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_22, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_22 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_137 = paddle._C_ops.matmul(dropout_184, parameter_18, False, False) + del dropout_184, parameter_18 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_206 = paddle._C_ops.add(matmul_137, parameter_17) + del matmul_137, parameter_17 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_186, dropout_187 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_206, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_206 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_207 = paddle._C_ops.add(dropout_186, layer_norm_132) + del dropout_186, layer_norm_132 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_207, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_207, parameter_21, parameter_22 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_138 = paddle._C_ops.matmul(layer_norm_135, parameter_16, False, False) + del parameter_16 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_161 = paddle._C_ops.reshape(matmul_138, full_int_array_5) + del matmul_138 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_139 = paddle._C_ops.matmul(layer_norm_135, parameter_15, False, False) + del parameter_15 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_162 = paddle._C_ops.reshape(matmul_139, full_int_array_5) + del matmul_139 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x1024xf32, 1024x1024xf32) + matmul_140 = paddle._C_ops.matmul(layer_norm_135, parameter_14, False, False) + del parameter_14 + + # pd_op.reshape: (9x1x16x64xf32) <- (9x1x1024xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(matmul_140, full_int_array_5) + del full_int_array_5, matmul_140 + + # pd_op.matmul: (18x1x1024xf32) <- (18x1x1024xf32, 1024x1024xf32) + matmul_141 = paddle._C_ops.matmul(dropout_2, parameter_12, False, False) + del dropout_2, parameter_12 + + # pd_op.reshape: (18x1x16x64xf32) <- (18x1x1024xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(matmul_141, full_int_array_6) + del full_int_array_6, matmul_141 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_208 = paddle._C_ops.add(reshape_161, parameter_9) + del parameter_9 + + # builtin.combine: ([9x1x16x64xf32, 9x1x16x64xf32]) <- (9x1x16x64xf32, 9x1x16x64xf32) + combine_140 = [add_208, reshape_162] + del add_208, reshape_162 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x1x16x64xf32, 9x1x16x64xf32]) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + einsum_417, einsum_418, einsum_419 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_140, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_140 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_556, + split_557, + ) = einsum_418 + del einsum_418 + + # builtin.split: (9x1x16x64xf32, 9x1x16x64xf32) <- ([9x1x16x64xf32, 9x1x16x64xf32]) + ( + split_558, + split_559, + ) = einsum_419 + del einsum_419 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_209 = paddle._C_ops.add(reshape_161, parameter_11) + del parameter_11 + + # builtin.combine: ([9x1x16x64xf32, 18x1x16x64xf32]) <- (9x1x16x64xf32, 18x1x16x64xf32) + combine_141 = [add_209, reshape_164] + del add_209, reshape_164 + + # pd_op.einsum: (1x16x9x18xf32, [0xf32, 0xf32], [9x1x16x64xf32, 18x1x16x64xf32]) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + einsum_420, einsum_421, einsum_422 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_141, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_141 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_560, + split_561, + ) = einsum_421 + del einsum_421 + + # builtin.split: (9x1x16x64xf32, 18x1x16x64xf32) <- ([9x1x16x64xf32, 18x1x16x64xf32]) + ( + split_562, + split_563, + ) = einsum_422 + del einsum_422 + + # pd_op.reshape: (1x16x18x9xf32) <- (1x16x9x18xf32, 4xi64) + reshape_165 = paddle._C_ops.reshape(einsum_420, full_int_array_7) + del einsum_420, full_int_array_7 + + # pd_op.slice: (1x16x17x9xf32) <- (1x16x18x9xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + reshape_165, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del full_int_array_3, full_int_array_8, reshape_165 + + # pd_op.reshape: (1x16x9x17xf32) <- (1x16x17x9xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(slice_23, full_int_array_9) + del full_int_array_9, slice_23 + + # pd_op.index_select: (1x16x9x9xf32) <- (1x16x9x17xf32, 9xi64) + index_select_23 = paddle._C_ops.index_select(reshape_166, arange_2, 3) + del arange_2, reshape_166 + + # pd_op.add: (9x1x16x64xf32) <- (9x1x16x64xf32, 16x64xf32) + add_210 = paddle._C_ops.add(reshape_161, parameter_10) + del parameter_10, reshape_161 + + # builtin.combine: ([9x1x16x64xf32, 2x16x64xf32]) <- (9x1x16x64xf32, 2x16x64xf32) + combine_142 = [add_210, parameter_8] + del add_210, parameter_8 + + # pd_op.einsum: (9x1x16x2xf32, [0xf32, 0xf32], [9x1x16x64xf32, 2x16x64xf32]) <- ([9x1x16x64xf32, 2x16x64xf32]) + einsum_423, einsum_424, einsum_425 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_142, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_142 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_564, + split_565, + ) = einsum_424 + del einsum_424 + + # builtin.split: (9x1x16x64xf32, 2x16x64xf32) <- ([9x1x16x64xf32, 2x16x64xf32]) + ( + split_566, + split_567, + ) = einsum_425 + del einsum_425 + + # builtin.combine: ([9x9x1x2xf32, 9x1x16x2xf32]) <- (9x9x1x2xf32, 9x1x16x2xf32) + combine_143 = [cast_5, einsum_423] + del cast_5, einsum_423 + + # pd_op.einsum: (1x16x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x16x2xf32]) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + einsum_426, einsum_427, einsum_428 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_143, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_143 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_568, + split_569, + ) = einsum_427 + del einsum_427 + + # builtin.split: (9x9x1x2xf32, 9x1x16x2xf32) <- ([9x9x1x2xf32, 9x1x16x2xf32]) + ( + split_570, + split_571, + ) = einsum_428 + del einsum_428 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_211 = paddle._C_ops.add(einsum_417, index_select_23) + del einsum_417, index_select_23 + + # pd_op.add: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x16x9x9xf32) + add_212 = paddle._C_ops.add(add_211, einsum_426) + del add_211, einsum_426 + + # pd_op.scale: (1x16x9x9xf32) <- (1x16x9x9xf32, 1xf32) + scale_27 = paddle._C_ops.scale(add_212, full_16, float("0"), True) + del add_212, full_16 + + # pd_op.subtract: (1x16x9x9xf32) <- (1x16x9x9xf32, 1x1x9x9xf32) + subtract_23 = paddle._C_ops.subtract(scale_27, scale_4) + del scale_27, scale_4 + + # pd_op.softmax: (1x16x9x9xf32) <- (1x16x9x9xf32) + softmax_23 = paddle._C_ops.softmax(subtract_23, 3) + del subtract_23 + + # pd_op.dropout: (1x16x9x9xf32, 1x16x9x9xui8) <- (1x16x9x9xf32, None, 1xf32) + dropout_188, dropout_189 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_23 + + # builtin.combine: ([1x16x9x9xf32, 9x1x16x64xf32]) <- (1x16x9x9xf32, 9x1x16x64xf32) + combine_144 = [dropout_188, reshape_163] + del dropout_188, reshape_163 + + # pd_op.einsum: (9x1x16x64xf32, [0xf32, 0xf32], [1x16x9x9xf32, 9x1x16x64xf32]) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + einsum_429, einsum_430, einsum_431 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_144, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_144 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_572, + split_573, + ) = einsum_430 + del einsum_430 + + # builtin.split: (1x16x9x9xf32, 9x1x16x64xf32) <- ([1x16x9x9xf32, 9x1x16x64xf32]) + ( + split_574, + split_575, + ) = einsum_431 + del einsum_431 + + # pd_op.reshape: (9x1x1024xf32) <- (9x1x16x64xf32, 3xi64) + reshape_167 = paddle._C_ops.reshape(einsum_429, full_int_array_10) + del einsum_429, full_int_array_10 + + # builtin.combine: ([9x1x1024xf32, 1024x1024xf32]) <- (9x1x1024xf32, 1024x1024xf32) + combine_145 = [reshape_167, parameter_13] + del parameter_13, reshape_167 + + # pd_op.einsum: (9x1x1024xf32, [0xf32, 0xf32], [9x1x1024xf32, 1024x1024xf32]) <- ([9x1x1024xf32, 1024x1024xf32]) + einsum_432, einsum_433, einsum_434 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_145, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_145 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_576, + split_577, + ) = einsum_433 + del einsum_433 + + # builtin.split: (9x1x1024xf32, 1024x1024xf32) <- ([9x1x1024xf32, 1024x1024xf32]) + ( + split_578, + split_579, + ) = einsum_434 + del einsum_434 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_190, dropout_191 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_432, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_432 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_213 = paddle._C_ops.add(dropout_190, layer_norm_135) + del dropout_190, layer_norm_135 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_213, parameter_7, parameter_6, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_213, parameter_6, parameter_7 + + # pd_op.matmul: (9x1x4096xf32) <- (9x1x1024xf32, 1024x4096xf32) + matmul_142 = paddle._C_ops.matmul(layer_norm_138, parameter_3, False, False) + del parameter_3 + + # pd_op.add: (9x1x4096xf32) <- (9x1x4096xf32, 4096xf32) + add_214 = paddle._C_ops.add(matmul_142, parameter_2) + del matmul_142, parameter_2 + + # pd_op.relu: (9x1x4096xf32) <- (9x1x4096xf32) + relu_23 = paddle._C_ops.relu(add_214) + del add_214 + + # pd_op.dropout: (9x1x4096xf32, 9x1x4096xui8) <- (9x1x4096xf32, None, 1xf32) + dropout_192, dropout_193 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_23 + + # pd_op.matmul: (9x1x1024xf32) <- (9x1x4096xf32, 4096x1024xf32) + matmul_143 = paddle._C_ops.matmul(dropout_192, parameter_1, False, False) + del dropout_192, parameter_1 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 1024xf32) + add_215 = paddle._C_ops.add(matmul_143, parameter_0) + del matmul_143, parameter_0 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_194, dropout_195 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_215, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_215 + + # pd_op.add: (9x1x1024xf32) <- (9x1x1024xf32, 9x1x1024xf32) + add_216 = paddle._C_ops.add(dropout_194, layer_norm_138) + del dropout_194, layer_norm_138 + + # pd_op.layer_norm: (9x1x1024xf32, 9x1xf32, 9x1xf32) <- (9x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_216, parameter_5, parameter_4, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_216, parameter_4, parameter_5 + + # pd_op.dropout: (9x1x1024xf32, 9x1x1024xui8) <- (9x1x1024xf32, None, 1xf32) + dropout_196, dropout_197 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_141, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_3, layer_norm_141 + + # pd_op.transpose: (1x9x1024xf32) <- (9x1x1024xf32) + transpose_0 = paddle._C_ops.transpose(dropout_196, [1, 0, 2]) + del dropout_196 + + return transpose_0 diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-large/weight_meta.py b/paddle_samples/PaddleNLP/chinese-xlnet-large/weight_meta.py new file mode 100644 index 000000000..f950aa405 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-large/weight_meta.py @@ -0,0 +1,4076 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101174") + max_val = float("0.106974") + mean = float("1.12949e-05") + std = float("0.0200075") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.103823") + max_val = float("0.101782") + mean = float("7.26515e-07") + std = float("0.0200079") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.080153") + max_val = float("0.0671401") + mean = float("0.000422448") + std = float("0.0202072") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0689389") + max_val = float("0.0645356") + mean = float("-3.42223e-05") + std = float("0.0190464") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0680583") + max_val = float("0.058991") + mean = float("-7.84936e-05") + std = float("0.0205614") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0644402") + max_val = float("0.0624891") + mean = float("0.00137222") + std = float("0.0198877") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.105641") + max_val = float("0.0969925") + mean = float("-2.13787e-05") + std = float("0.0200024") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0982633") + max_val = float("0.101413") + mean = float("2.36475e-05") + std = float("0.0199959") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0963071") + max_val = float("0.093978") + mean = float("-5.18626e-06") + std = float("0.0199941") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0981089") + max_val = float("0.111486") + mean = float("1.5163e-05") + std = float("0.0199782") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0959459") + max_val = float("0.0927912") + mean = float("-2.88949e-05") + std = float("0.0199985") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.11361") + max_val = float("0.0986661") + mean = float("-1.20929e-05") + std = float("0.0199967") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.108304") + max_val = float("0.101341") + mean = float("1.0301e-06") + std = float("0.0199869") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0776118") + max_val = float("0.0769515") + mean = float("-0.000727962") + std = float("0.0201071") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0595223") + max_val = float("0.0820465") + mean = float("-0.000412415") + std = float("0.0201139") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0582567") + max_val = float("0.0791273") + mean = float("0.000193121") + std = float("0.0197251") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0675272") + max_val = float("0.100214") + mean = float("-0.000132156") + std = float("0.0208549") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100283") + max_val = float("0.0990686") + mean = float("-9.22493e-06") + std = float("0.0200041") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0979155") + max_val = float("0.0984891") + mean = float("-2.74728e-05") + std = float("0.0200198") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.115893") + max_val = float("0.0921145") + mean = float("4.66574e-05") + std = float("0.0200015") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0988392") + max_val = float("0.107493") + mean = float("1.13895e-05") + std = float("0.0199926") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0931193") + max_val = float("0.107737") + mean = float("6.50127e-06") + std = float("0.0200053") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.107361") + max_val = float("0.0978937") + mean = float("-6.68323e-06") + std = float("0.0200042") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101607") + max_val = float("0.104253") + mean = float("1.84875e-05") + std = float("0.0199944") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0688502") + max_val = float("0.0717973") + mean = float("0.00071679") + std = float("0.020282") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.065053") + max_val = float("0.0546639") + mean = float("-0.000881583") + std = float("0.0198136") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0616152") + max_val = float("0.0633376") + mean = float("-0.00082395") + std = float("0.0201433") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0703036") + max_val = float("0.071403") + mean = float("0.000445171") + std = float("0.0197137") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0943561") + max_val = float("0.0883682") + mean = float("7.88088e-06") + std = float("0.0200132") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0939037") + max_val = float("0.101689") + mean = float("-4.57994e-05") + std = float("0.020009") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0882293") + max_val = float("0.0969764") + mean = float("2.04783e-05") + std = float("0.0199901") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.090025") + max_val = float("0.0979138") + mean = float("-5.89511e-06") + std = float("0.0199945") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0971927") + max_val = float("0.0930698") + mean = float("3.42978e-05") + std = float("0.0200034") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.102314") + max_val = float("0.0957205") + mean = float("-5.60234e-06") + std = float("0.019992") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0976568") + max_val = float("0.110507") + mean = float("-5.50881e-06") + std = float("0.0200016") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0741549") + max_val = float("0.0757544") + mean = float("0.000369111") + std = float("0.0199624") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0553387") + max_val = float("0.0627761") + mean = float("0.000173923") + std = float("0.0192062") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0625853") + max_val = float("0.0559364") + mean = float("-0.000558363") + std = float("0.0205408") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.08983") + max_val = float("0.0593039") + mean = float("0.000151348") + std = float("0.0206735") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0981591") + max_val = float("0.0960741") + mean = float("1.92988e-05") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0997372") + max_val = float("0.098958") + mean = float("-3.23046e-06") + std = float("0.0199816") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.096217") + max_val = float("0.095661") + mean = float("-9.95077e-06") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0955656") + max_val = float("0.0941511") + mean = float("-4.31246e-06") + std = float("0.0199914") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0992673") + max_val = float("0.105218") + mean = float("2.06525e-05") + std = float("0.0199946") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.111812") + max_val = float("0.107004") + mean = float("5.71117e-06") + std = float("0.0200079") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100086") + max_val = float("0.106293") + mean = float("-1.2655e-05") + std = float("0.019992") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0634019") + max_val = float("0.0621452") + mean = float("-0.00104323") + std = float("0.0198099") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0703846") + max_val = float("0.0622295") + mean = float("0.000349012") + std = float("0.0197498") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0663927") + max_val = float("0.0833278") + mean = float("0.000419491") + std = float("0.020438") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0657833") + max_val = float("0.0647611") + mean = float("0.00073824") + std = float("0.0198901") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0983278") + max_val = float("0.09088") + mean = float("2.25245e-05") + std = float("0.0199907") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0916077") + max_val = float("0.101837") + mean = float("3.14194e-06") + std = float("0.0199902") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.092645") + max_val = float("0.0972162") + mean = float("-2.07655e-05") + std = float("0.01999") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0946761") + max_val = float("0.0993856") + mean = float("7.70147e-06") + std = float("0.0199826") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.119291") + max_val = float("0.0945843") + mean = float("-1.80735e-05") + std = float("0.0199915") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.106226") + max_val = float("0.106861") + mean = float("1.53389e-06") + std = float("0.019991") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.107367") + max_val = float("0.0993204") + mean = float("6.34937e-06") + std = float("0.0199885") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0635183") + max_val = float("0.0880163") + mean = float("-0.00041083") + std = float("0.020506") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0758214") + max_val = float("0.0642966") + mean = float("-0.000908967") + std = float("0.0199706") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0587357") + max_val = float("0.0604111") + mean = float("-0.000315282") + std = float("0.0196432") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0623426") + max_val = float("0.0619337") + mean = float("0.000404857") + std = float("0.0207849") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0939795") + max_val = float("0.101385") + mean = float("2.0498e-05") + std = float("0.0199942") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0954476") + max_val = float("0.101808") + mean = float("8.66002e-07") + std = float("0.0199923") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0988117") + max_val = float("0.0962046") + mean = float("2.66444e-05") + std = float("0.0199712") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.104092") + max_val = float("0.093028") + mean = float("-3.59512e-06") + std = float("0.0200182") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0977985") + max_val = float("0.0991767") + mean = float("5.80583e-05") + std = float("0.0199986") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.100591") + max_val = float("0.102676") + mean = float("1.4664e-05") + std = float("0.0199999") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.10186") + max_val = float("0.103011") + mean = float("7.18337e-06") + std = float("0.0199978") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0722179") + max_val = float("0.0697168") + mean = float("0.000629881") + std = float("0.020497") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0683354") + max_val = float("0.0530142") + mean = float("-0.00041802") + std = float("0.0197935") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0682838") + max_val = float("0.0628097") + mean = float("-0.000535469") + std = float("0.0203411") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.056469") + max_val = float("0.0633971") + mean = float("0.00113328") + std = float("0.019812") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103305") + max_val = float("0.107857") + mean = float("5.03488e-06") + std = float("0.0199934") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.105087") + max_val = float("0.0940546") + mean = float("-1.19034e-05") + std = float("0.0200155") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0942608") + max_val = float("0.0985209") + mean = float("-1.18328e-05") + std = float("0.0200002") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101857") + max_val = float("0.103268") + mean = float("-2.53371e-05") + std = float("0.020014") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100453") + max_val = float("0.101783") + mean = float("8.46206e-06") + std = float("0.0200377") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.110061") + max_val = float("0.0977406") + mean = float("-1.44333e-05") + std = float("0.0200069") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.10051") + max_val = float("0.112146") + mean = float("-8.05737e-06") + std = float("0.0199925") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0686442") + max_val = float("0.0714007") + mean = float("-0.000159975") + std = float("0.0201678") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0595842") + max_val = float("0.0719294") + mean = float("-3.13658e-05") + std = float("0.0204201") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0647841") + max_val = float("0.0633613") + mean = float("-0.00077124") + std = float("0.0195923") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0657016") + max_val = float("0.0680139") + mean = float("-0.00124655") + std = float("0.0194623") + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0941171") + max_val = float("0.0950286") + mean = float("6.25059e-06") + std = float("0.020012") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0945435") + max_val = float("0.100857") + mean = float("3.24381e-05") + std = float("0.0200183") + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.108057") + max_val = float("0.0941034") + mean = float("-1.74459e-05") + std = float("0.019992") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0978291") + max_val = float("0.0925467") + mean = float("1.02683e-06") + std = float("0.0200146") + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.109219") + max_val = float("0.0987054") + mean = float("1.04907e-05") + std = float("0.0200115") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.103292") + max_val = float("0.10374") + mean = float("-1.9846e-06") + std = float("0.0199935") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100205") + max_val = float("0.0964732") + mean = float("4.08738e-06") + std = float("0.0200045") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.066533") + max_val = float("0.0640852") + mean = float("0.000620742") + std = float("0.0199716") + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0606203") + max_val = float("0.0627061") + mean = float("4.99137e-06") + std = float("0.0194108") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0717645") + max_val = float("0.0675073") + mean = float("0.000530434") + std = float("0.0199398") + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0561925") + max_val = float("0.0620645") + mean = float("0.000263358") + std = float("0.0195281") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0920362") + max_val = float("0.0918777") + mean = float("-1.84304e-05") + std = float("0.0199942") + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0990218") + max_val = float("0.096936") + mean = float("-2.33644e-05") + std = float("0.0199892") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100763") + max_val = float("0.0914228") + mean = float("-4.19569e-05") + std = float("0.0199779") + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0997828") + max_val = float("0.0945503") + mean = float("-1.85671e-05") + std = float("0.0200027") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0970994") + max_val = float("0.103876") + mean = float("9.65016e-06") + std = float("0.0199919") + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.108847") + max_val = float("0.104224") + mean = float("1.70981e-07") + std = float("0.0199995") + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0984719") + max_val = float("0.100283") + mean = float("-2.14459e-06") + std = float("0.0200008") + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0652733") + max_val = float("0.0623973") + mean = float("-0.000111954") + std = float("0.0196823") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0612179") + max_val = float("0.0602291") + mean = float("0.000256394") + std = float("0.0194512") + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0608343") + max_val = float("0.0584399") + mean = float("0.000367647") + std = float("0.019854") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.063469") + max_val = float("0.0856444") + mean = float("-0.000209721") + std = float("0.0194563") + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0932418") + max_val = float("0.0971317") + mean = float("8.3827e-06") + std = float("0.019981") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0943668") + max_val = float("0.0910483") + mean = float("7.40488e-06") + std = float("0.0200086") + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0931213") + max_val = float("0.102227") + mean = float("4.29648e-05") + std = float("0.0199886") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0963163") + max_val = float("0.101721") + mean = float("1.81921e-05") + std = float("0.0199958") + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.105238") + max_val = float("0.102362") + mean = float("-2.42609e-05") + std = float("0.0200077") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0950606") + max_val = float("0.104676") + mean = float("1.38038e-05") + std = float("0.0200084") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101412") + max_val = float("0.10013") + mean = float("1.1315e-05") + std = float("0.0199956") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0655597") + max_val = float("0.0763105") + mean = float("-0.000411208") + std = float("0.0201434") + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0668925") + max_val = float("0.0576623") + mean = float("0.00108288") + std = float("0.0203384") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0646394") + max_val = float("0.0680152") + mean = float("-0.000135809") + std = float("0.0195784") + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.060051") + max_val = float("0.0630734") + mean = float("-0.000379699") + std = float("0.0203422") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0969839") + max_val = float("0.101515") + mean = float("-2.37039e-05") + std = float("0.0200014") + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0968036") + max_val = float("0.0968174") + mean = float("1.96444e-05") + std = float("0.0200059") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0865728") + max_val = float("0.0994847") + mean = float("-3.51517e-06") + std = float("0.0200172") + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0948586") + max_val = float("0.0898406") + mean = float("9.69405e-06") + std = float("0.0199866") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0948052") + max_val = float("0.0915345") + mean = float("-2.06129e-05") + std = float("0.0199876") + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.105008") + max_val = float("0.113057") + mean = float("6.88203e-06") + std = float("0.0200095") + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100402") + max_val = float("0.104401") + mean = float("1.41856e-05") + std = float("0.0199831") + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0805676") + max_val = float("0.0700786") + mean = float("3.40461e-05") + std = float("0.0200066") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0611997") + max_val = float("0.0788406") + mean = float("0.000472469") + std = float("0.0202516") + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0586935") + max_val = float("0.0629197") + mean = float("-0.000473277") + std = float("0.0198758") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0574957") + max_val = float("0.0719211") + mean = float("-0.00096253") + std = float("0.0200222") + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0975594") + max_val = float("0.0934616") + mean = float("-7.13832e-06") + std = float("0.0200197") + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0962643") + max_val = float("0.0999671") + mean = float("1.92496e-06") + std = float("0.019963") + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.105258") + max_val = float("0.0996388") + mean = float("1.98332e-05") + std = float("0.0200079") + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0928025") + max_val = float("0.0988722") + mean = float("-1.5117e-05") + std = float("0.0200138") + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.095285") + max_val = float("0.0982607") + mean = float("7.84191e-06") + std = float("0.0199938") + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.107757") + max_val = float("0.0972812") + mean = float("6.76515e-06") + std = float("0.0200009") + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0997285") + max_val = float("0.100684") + mean = float("8.63008e-06") + std = float("0.0199863") + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0701117") + max_val = float("0.0675819") + mean = float("-0.00082489") + std = float("0.0199821") + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0623671") + max_val = float("0.0615726") + mean = float("-0.000293522") + std = float("0.0202768") + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0646346") + max_val = float("0.0690476") + mean = float("0.000435714") + std = float("0.0194082") + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0644714") + max_val = float("0.055561") + mean = float("0.00138591") + std = float("0.0195516") + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0918271") + max_val = float("0.0972187") + mean = float("1.31732e-05") + std = float("0.0199885") + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0997392") + max_val = float("0.100097") + mean = float("3.5355e-06") + std = float("0.0199998") + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0972613") + max_val = float("0.0896363") + mean = float("8.42154e-06") + std = float("0.0199989") + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0981378") + max_val = float("0.0933598") + mean = float("1.3638e-05") + std = float("0.0200094") + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.106812") + max_val = float("0.101117") + mean = float("6.56158e-07") + std = float("0.0199876") + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.100439") + max_val = float("0.102285") + mean = float("-9.53002e-06") + std = float("0.0199964") + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0995681") + max_val = float("0.0990056") + mean = float("8.84426e-06") + std = float("0.0199936") + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0806161") + max_val = float("0.0728875") + mean = float("0.000353456") + std = float("0.0196011") + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0537205") + max_val = float("0.0627947") + mean = float("0.00138492") + std = float("0.0194261") + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0614613") + max_val = float("0.0610024") + mean = float("-0.00019822") + std = float("0.0195517") + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0737003") + max_val = float("0.0481376") + mean = float("-0.000334779") + std = float("0.0194257") + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.092247") + max_val = float("0.106602") + mean = float("2.78315e-05") + std = float("0.0200159") + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0976784") + max_val = float("0.0968902") + mean = float("-1.74879e-05") + std = float("0.0199951") + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.097143") + max_val = float("0.0984268") + mean = float("-1.14251e-05") + std = float("0.0199745") + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0973396") + max_val = float("0.0968062") + mean = float("8.64476e-06") + std = float("0.0199767") + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0898708") + max_val = float("0.0938607") + mean = float("1.28792e-05") + std = float("0.0200167") + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.103183") + max_val = float("0.10181") + mean = float("1.76587e-05") + std = float("0.0199945") + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101759") + max_val = float("0.108292") + mean = float("3.31314e-06") + std = float("0.0200034") + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0649272") + max_val = float("0.0724965") + mean = float("1.94794e-05") + std = float("0.0201683") + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0579609") + max_val = float("0.063785") + mean = float("-0.000174216") + std = float("0.0196671") + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0673813") + max_val = float("0.0673014") + mean = float("0.000625371") + std = float("0.0199423") + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0588756") + max_val = float("0.0810359") + mean = float("0.000141649") + std = float("0.020254") + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.094093") + max_val = float("0.0971453") + mean = float("2.29591e-05") + std = float("0.0200024") + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0951525") + max_val = float("0.0955532") + mean = float("-3.29141e-05") + std = float("0.0199776") + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.091507") + max_val = float("0.105083") + mean = float("1.98842e-05") + std = float("0.0199812") + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.107993") + max_val = float("0.102463") + mean = float("-1.49592e-05") + std = float("0.0199948") + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0939214") + max_val = float("0.0950489") + mean = float("-2.89977e-05") + std = float("0.0199896") + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0974402") + max_val = float("0.109486") + mean = float("-1.05386e-05") + std = float("0.0200016") + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.098575") + max_val = float("0.0998085") + mean = float("-1.17447e-05") + std = float("0.0200097") + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.069583") + max_val = float("0.090873") + mean = float("8.99848e-05") + std = float("0.0197018") + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0557726") + max_val = float("0.0740368") + mean = float("-0.000676958") + std = float("0.0203581") + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0786428") + max_val = float("0.0575711") + mean = float("0.00127308") + std = float("0.0200049") + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0625172") + max_val = float("0.064942") + mean = float("0.000881327") + std = float("0.0211175") + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103271") + max_val = float("0.094095") + mean = float("4.64615e-08") + std = float("0.0199997") + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0935565") + max_val = float("0.09608") + mean = float("1.38109e-05") + std = float("0.0200004") + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0937777") + max_val = float("0.0868086") + mean = float("3.14887e-05") + std = float("0.0200033") + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0982899") + max_val = float("0.105437") + mean = float("-8.34959e-06") + std = float("0.0199841") + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.109972") + max_val = float("0.104686") + mean = float("2.04251e-05") + std = float("0.0200012") + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101295") + max_val = float("0.104349") + mean = float("-6.44501e-06") + std = float("0.0200025") + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.10127") + max_val = float("0.0981984") + mean = float("4.20567e-06") + std = float("0.0199984") + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0679478") + max_val = float("0.0749753") + mean = float("-6.18988e-05") + std = float("0.0200871") + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0752752") + max_val = float("0.0699409") + mean = float("8.90834e-05") + std = float("0.0204845") + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0564257") + max_val = float("0.0582638") + mean = float("-0.000267411") + std = float("0.0197657") + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.070838") + max_val = float("0.0572247") + mean = float("-0.000798589") + std = float("0.019529") + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0957708") + max_val = float("0.0956196") + mean = float("3.22566e-05") + std = float("0.0200046") + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103727") + max_val = float("0.108267") + mean = float("-2.74841e-05") + std = float("0.0199752") + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0984173") + max_val = float("0.0913852") + mean = float("-1.74837e-05") + std = float("0.0199919") + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0978536") + max_val = float("0.104957") + mean = float("-5.7202e-06") + std = float("0.0199945") + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103587") + max_val = float("0.0976505") + mean = float("-2.64606e-05") + std = float("0.0199942") + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.103851") + max_val = float("0.101262") + mean = float("9.33794e-06") + std = float("0.0199928") + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100662") + max_val = float("0.102835") + mean = float("-2.55434e-06") + std = float("0.0200103") + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0626017") + max_val = float("0.0656066") + mean = float("-0.000111057") + std = float("0.0202468") + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0619167") + max_val = float("0.0700004") + mean = float("0.00032209") + std = float("0.0201107") + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0703513") + max_val = float("0.0520745") + mean = float("0.000125638") + std = float("0.0192279") + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0523908") + max_val = float("0.0653329") + mean = float("-0.000826464") + std = float("0.020002") + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.10032") + max_val = float("0.107837") + mean = float("-7.52083e-06") + std = float("0.0199915") + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0963977") + max_val = float("0.101089") + mean = float("3.80581e-05") + std = float("0.0199917") + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0909416") + max_val = float("0.0948842") + mean = float("-3.12662e-06") + std = float("0.0200055") + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101874") + max_val = float("0.0946097") + mean = float("5.74289e-07") + std = float("0.0199916") + data = None + + +class Program_weight_tensor_parameter_305: + name = "parameter_305" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.107225") + max_val = float("0.0995108") + mean = float("-4.92357e-06") + std = float("0.0200176") + data = None + + +class Program_weight_tensor_parameter_306: + name = "parameter_306" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_307: + name = "parameter_307" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.104269") + max_val = float("0.104589") + mean = float("-1.60604e-06") + std = float("0.0200035") + data = None + + +class Program_weight_tensor_parameter_308: + name = "parameter_308" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_309: + name = "parameter_309" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.102395") + max_val = float("0.0991339") + mean = float("1.0274e-05") + std = float("0.019996") + data = None + + +class Program_weight_tensor_parameter_310: + name = "parameter_310" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_311: + name = "parameter_311" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_312: + name = "parameter_312" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_313: + name = "parameter_313" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_314: + name = "parameter_314" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0841563") + max_val = float("0.0637723") + mean = float("-0.000201798") + std = float("0.0196366") + data = None + + +class Program_weight_tensor_parameter_315: + name = "parameter_315" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0652055") + max_val = float("0.0702022") + mean = float("9.55746e-05") + std = float("0.0203474") + data = None + + +class Program_weight_tensor_parameter_316: + name = "parameter_316" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0613293") + max_val = float("0.0549883") + mean = float("-0.000485551") + std = float("0.0195201") + data = None + + +class Program_weight_tensor_parameter_317: + name = "parameter_317" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0746907") + max_val = float("0.0600717") + mean = float("-0.000675786") + std = float("0.0194105") + data = None + + +class Program_weight_tensor_parameter_318: + name = "parameter_318" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.107453") + max_val = float("0.0994477") + mean = float("1.77619e-05") + std = float("0.0199824") + data = None + + +class Program_weight_tensor_parameter_319: + name = "parameter_319" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0966336") + max_val = float("0.0984281") + mean = float("2.17781e-05") + std = float("0.0200194") + data = None + + +class Program_weight_tensor_parameter_320: + name = "parameter_320" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0942078") + max_val = float("0.106695") + mean = float("1.58374e-05") + std = float("0.0199883") + data = None + + +class Program_weight_tensor_parameter_321: + name = "parameter_321" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0992307") + max_val = float("0.0907452") + mean = float("1.90126e-05") + std = float("0.0199944") + data = None + + +class Program_weight_tensor_parameter_322: + name = "parameter_322" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0919617") + max_val = float("0.0934963") + mean = float("-2.61964e-05") + std = float("0.0199878") + data = None + + +class Program_weight_tensor_parameter_323: + name = "parameter_323" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_324: + name = "parameter_324" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.100454") + max_val = float("0.100013") + mean = float("2.46501e-05") + std = float("0.0200119") + data = None + + +class Program_weight_tensor_parameter_325: + name = "parameter_325" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_326: + name = "parameter_326" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.104897") + max_val = float("0.0974905") + mean = float("1.47232e-05") + std = float("0.0199932") + data = None + + +class Program_weight_tensor_parameter_327: + name = "parameter_327" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_328: + name = "parameter_328" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_329: + name = "parameter_329" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_330: + name = "parameter_330" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_331: + name = "parameter_331" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0641577") + max_val = float("0.0666822") + mean = float("0.000249277") + std = float("0.0200067") + data = None + + +class Program_weight_tensor_parameter_332: + name = "parameter_332" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0581891") + max_val = float("0.0640808") + mean = float("0.0010717") + std = float("0.0200259") + data = None + + +class Program_weight_tensor_parameter_333: + name = "parameter_333" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0621781") + max_val = float("0.0541678") + mean = float("-0.00076242") + std = float("0.0205017") + data = None + + +class Program_weight_tensor_parameter_334: + name = "parameter_334" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0653996") + max_val = float("0.0684167") + mean = float("0.000209998") + std = float("0.0206692") + data = None + + +class Program_weight_tensor_parameter_335: + name = "parameter_335" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.093042") + max_val = float("0.0933048") + mean = float("-3.51466e-05") + std = float("0.0200255") + data = None + + +class Program_weight_tensor_parameter_336: + name = "parameter_336" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0970212") + max_val = float("0.0938276") + mean = float("-1.91991e-06") + std = float("0.0200234") + data = None + + +class Program_weight_tensor_parameter_337: + name = "parameter_337" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0990197") + max_val = float("0.0960994") + mean = float("9.8341e-06") + std = float("0.0200041") + data = None + + +class Program_weight_tensor_parameter_338: + name = "parameter_338" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0958936") + max_val = float("0.101472") + mean = float("8.20897e-06") + std = float("0.02") + data = None + + +class Program_weight_tensor_parameter_339: + name = "parameter_339" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0994704") + max_val = float("0.0964828") + mean = float("1.65094e-05") + std = float("0.0199897") + data = None + + +class Program_weight_tensor_parameter_340: + name = "parameter_340" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_341: + name = "parameter_341" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0985067") + max_val = float("0.100013") + mean = float("-1.95348e-05") + std = float("0.0200054") + data = None + + +class Program_weight_tensor_parameter_342: + name = "parameter_342" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_343: + name = "parameter_343" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101093") + max_val = float("0.0976068") + mean = float("1.17624e-05") + std = float("0.0200044") + data = None + + +class Program_weight_tensor_parameter_344: + name = "parameter_344" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_345: + name = "parameter_345" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_346: + name = "parameter_346" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_347: + name = "parameter_347" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_348: + name = "parameter_348" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0643575") + max_val = float("0.0683558") + mean = float("0.000492696") + std = float("0.0192451") + data = None + + +class Program_weight_tensor_parameter_349: + name = "parameter_349" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0688406") + max_val = float("0.0745166") + mean = float("-0.000331381") + std = float("0.0204476") + data = None + + +class Program_weight_tensor_parameter_350: + name = "parameter_350" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0738689") + max_val = float("0.0695295") + mean = float("0.000565032") + std = float("0.0202335") + data = None + + +class Program_weight_tensor_parameter_351: + name = "parameter_351" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.056985") + max_val = float("0.0685464") + mean = float("3.19221e-05") + std = float("0.019697") + data = None + + +class Program_weight_tensor_parameter_352: + name = "parameter_352" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100409") + max_val = float("0.107729") + mean = float("5.13221e-07") + std = float("0.0199895") + data = None + + +class Program_weight_tensor_parameter_353: + name = "parameter_353" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.097502") + max_val = float("0.095367") + mean = float("-2.76522e-05") + std = float("0.0199902") + data = None + + +class Program_weight_tensor_parameter_354: + name = "parameter_354" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.098712") + max_val = float("0.10371") + mean = float("6.20669e-06") + std = float("0.0199942") + data = None + + +class Program_weight_tensor_parameter_355: + name = "parameter_355" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0926043") + max_val = float("0.111207") + mean = float("-3.25062e-06") + std = float("0.0200042") + data = None + + +class Program_weight_tensor_parameter_356: + name = "parameter_356" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0980042") + max_val = float("0.0994727") + mean = float("3.53943e-05") + std = float("0.0199748") + data = None + + +class Program_weight_tensor_parameter_357: + name = "parameter_357" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_358: + name = "parameter_358" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101384") + max_val = float("0.109823") + mean = float("9.79956e-06") + std = float("0.0199974") + data = None + + +class Program_weight_tensor_parameter_359: + name = "parameter_359" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_360: + name = "parameter_360" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100813") + max_val = float("0.104755") + mean = float("-1.55498e-05") + std = float("0.0200078") + data = None + + +class Program_weight_tensor_parameter_361: + name = "parameter_361" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_362: + name = "parameter_362" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_363: + name = "parameter_363" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_364: + name = "parameter_364" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_365: + name = "parameter_365" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0636676") + max_val = float("0.0673576") + mean = float("-0.00101459") + std = float("0.0198469") + data = None + + +class Program_weight_tensor_parameter_366: + name = "parameter_366" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0685063") + max_val = float("0.0597098") + mean = float("-0.000893922") + std = float("0.0194709") + data = None + + +class Program_weight_tensor_parameter_367: + name = "parameter_367" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.06185") + max_val = float("0.0633766") + mean = float("-0.00025569") + std = float("0.0199222") + data = None + + +class Program_weight_tensor_parameter_368: + name = "parameter_368" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.053385") + max_val = float("0.0562548") + mean = float("4.75082e-06") + std = float("0.0200367") + data = None + + +class Program_weight_tensor_parameter_369: + name = "parameter_369" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.1079") + max_val = float("0.096237") + mean = float("-1.25352e-05") + std = float("0.019986") + data = None + + +class Program_weight_tensor_parameter_370: + name = "parameter_370" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.102431") + max_val = float("0.0992015") + mean = float("-1.14069e-05") + std = float("0.0200011") + data = None + + +class Program_weight_tensor_parameter_371: + name = "parameter_371" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0940297") + max_val = float("0.102051") + mean = float("1.7207e-05") + std = float("0.0199849") + data = None + + +class Program_weight_tensor_parameter_372: + name = "parameter_372" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0985293") + max_val = float("0.0991618") + mean = float("-1.43206e-05") + std = float("0.0200159") + data = None + + +class Program_weight_tensor_parameter_373: + name = "parameter_373" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.105187") + max_val = float("0.105412") + mean = float("3.0328e-05") + std = float("0.0200177") + data = None + + +class Program_weight_tensor_parameter_374: + name = "parameter_374" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_375: + name = "parameter_375" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0987471") + max_val = float("0.105192") + mean = float("-9.56708e-06") + std = float("0.0200079") + data = None + + +class Program_weight_tensor_parameter_376: + name = "parameter_376" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_377: + name = "parameter_377" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0997025") + max_val = float("0.102423") + mean = float("4.15081e-06") + std = float("0.0199907") + data = None + + +class Program_weight_tensor_parameter_378: + name = "parameter_378" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_379: + name = "parameter_379" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_380: + name = "parameter_380" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_381: + name = "parameter_381" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_382: + name = "parameter_382" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0670102") + max_val = float("0.0670993") + mean = float("0.000295633") + std = float("0.0200305") + data = None + + +class Program_weight_tensor_parameter_383: + name = "parameter_383" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0734343") + max_val = float("0.0606798") + mean = float("-0.00036096") + std = float("0.0196409") + data = None + + +class Program_weight_tensor_parameter_384: + name = "parameter_384" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0699884") + max_val = float("0.0709267") + mean = float("-0.000168359") + std = float("0.0203052") + data = None + + +class Program_weight_tensor_parameter_385: + name = "parameter_385" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.058247") + max_val = float("0.0571072") + mean = float("-0.00104375") + std = float("0.0190464") + data = None + + +class Program_weight_tensor_parameter_386: + name = "parameter_386" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103492") + max_val = float("0.0961583") + mean = float("-3.09677e-05") + std = float("0.0199968") + data = None + + +class Program_weight_tensor_parameter_387: + name = "parameter_387" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0982702") + max_val = float("0.0894224") + mean = float("-2.02479e-05") + std = float("0.0199937") + data = None + + +class Program_weight_tensor_parameter_388: + name = "parameter_388" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.097582") + max_val = float("0.0910485") + mean = float("1.53476e-05") + std = float("0.0199956") + data = None + + +class Program_weight_tensor_parameter_389: + name = "parameter_389" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0965115") + max_val = float("0.0953453") + mean = float("1.63286e-05") + std = float("0.0200075") + data = None + + +class Program_weight_tensor_parameter_390: + name = "parameter_390" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0955548") + max_val = float("0.104097") + mean = float("5.95724e-06") + std = float("0.0200001") + data = None + + +class Program_weight_tensor_parameter_391: + name = "parameter_391" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_392: + name = "parameter_392" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.106777") + max_val = float("0.103619") + mean = float("-1.28013e-05") + std = float("0.0199958") + data = None + + +class Program_weight_tensor_parameter_393: + name = "parameter_393" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_394: + name = "parameter_394" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.10106") + max_val = float("0.100392") + mean = float("2.31494e-06") + std = float("0.0199927") + data = None + + +class Program_weight_tensor_parameter_395: + name = "parameter_395" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_396: + name = "parameter_396" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_397: + name = "parameter_397" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_398: + name = "parameter_398" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_399: + name = "parameter_399" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0549828") + max_val = float("0.0664872") + mean = float("0.000389889") + std = float("0.0199559") + data = None + + +class Program_weight_tensor_parameter_400: + name = "parameter_400" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0626439") + max_val = float("0.0606803") + mean = float("0.000118815") + std = float("0.0203933") + data = None + + +class Program_weight_tensor_parameter_401: + name = "parameter_401" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0707977") + max_val = float("0.067955") + mean = float("0.000513946") + std = float("0.0196487") + data = None + + +class Program_weight_tensor_parameter_402: + name = "parameter_402" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0592983") + max_val = float("0.0920773") + mean = float("0.000159731") + std = float("0.0207634") + data = None + + +class Program_weight_tensor_parameter_403: + name = "parameter_403" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0923136") + max_val = float("0.0980971") + mean = float("-2.65779e-06") + std = float("0.0199978") + data = None + + +class Program_weight_tensor_parameter_404: + name = "parameter_404" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0896717") + max_val = float("0.0923605") + mean = float("-1.91466e-05") + std = float("0.0200097") + data = None + + +class Program_weight_tensor_parameter_405: + name = "parameter_405" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0999169") + max_val = float("0.0928902") + mean = float("-2.14941e-05") + std = float("0.0200104") + data = None + + +class Program_weight_tensor_parameter_406: + name = "parameter_406" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0945058") + max_val = float("0.0918114") + mean = float("-1.16767e-05") + std = float("0.0200079") + data = None + + +class Program_weight_tensor_parameter_407: + name = "parameter_407" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101291") + max_val = float("0.0917568") + mean = float("4.35013e-05") + std = float("0.0199835") + data = None + + +class Program_weight_tensor_parameter_408: + name = "parameter_408" + shape = [32000, 1024] + dtype = "float32" + min_val = float("-0.104754") + max_val = float("0.116291") + mean = float("-1.36643e-06") + std = float("0.0199993") + data = None + + +class Program_weight_tensor_parameter_409: + name = "parameter_409" + shape = [1, 1, 1024] + dtype = "float32" + min_val = float("-0.053712") + max_val = float("0.0665643") + mean = float("0.000561582") + std = float("0.0194981") + data = None diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-mid/graph_net.json b/paddle_samples/PaddleNLP/chinese-xlnet-mid/graph_net.json new file mode 100644 index 000000000..637c415b1 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-mid/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "chinese-xlnet-mid", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-mid/input_meta.py b/paddle_samples/PaddleNLP/chinese-xlnet-mid/input_meta.py new file mode 100644 index 000000000..9ea1655e0 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-mid/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 9] + dtype = "int64" + data = [19, 11684, 121, 15954, 2090, 21957, 1039, 4, 3] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 9] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 2] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 9] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1] diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-mid/model.py b/paddle_samples/PaddleNLP/chinese-xlnet-mid/model.py new file mode 100644 index 000000000..1f9e0cdb8 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-mid/model.py @@ -0,0 +1,8389 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + parameter_305, + parameter_306, + parameter_307, + parameter_308, + parameter_309, + parameter_310, + parameter_311, + parameter_312, + parameter_313, + parameter_314, + parameter_315, + parameter_316, + parameter_317, + parameter_318, + parameter_319, + parameter_320, + parameter_321, + parameter_322, + parameter_323, + parameter_324, + parameter_325, + parameter_326, + parameter_327, + parameter_328, + parameter_329, + parameter_330, + parameter_331, + parameter_332, + parameter_333, + parameter_334, + parameter_335, + parameter_336, + parameter_337, + parameter_338, + parameter_339, + parameter_340, + parameter_341, + parameter_342, + parameter_343, + parameter_344, + parameter_345, + parameter_346, + parameter_347, + parameter_348, + parameter_349, + parameter_350, + parameter_351, + parameter_352, + parameter_353, + parameter_354, + parameter_355, + parameter_356, + parameter_357, + parameter_358, + parameter_359, + parameter_360, + parameter_361, + parameter_362, + parameter_363, + parameter_364, + parameter_365, + parameter_366, + parameter_367, + parameter_368, + parameter_369, + parameter_370, + parameter_371, + parameter_372, + parameter_373, + parameter_374, + parameter_375, + parameter_376, + parameter_377, + parameter_378, + parameter_379, + parameter_380, + parameter_381, + parameter_382, + parameter_383, + parameter_384, + parameter_385, + parameter_386, + parameter_387, + parameter_388, + parameter_389, + parameter_390, + parameter_391, + parameter_392, + parameter_393, + parameter_394, + parameter_395, + parameter_396, + parameter_397, + parameter_398, + parameter_399, + parameter_400, + parameter_401, + parameter_402, + parameter_403, + parameter_404, + parameter_405, + parameter_406, + parameter_407, + parameter_408, + parameter_409, + data_0, + data_1, + data_2, + ): + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_1 = paddle._C_ops.transpose(data_0, [1, 0]) + del data_0 + + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_2 = paddle._C_ops.transpose(data_1, [1, 0]) + del data_1 + + # pd_op.transpose: (9x1xi64) <- (1x9xi64) + transpose_3 = paddle._C_ops.transpose(data_2, [1, 0]) + del data_2 + + # pd_op.cast: (9x1xf32) <- (9x1xi64) + cast_0 = paddle._C_ops.cast(transpose_3, paddle.float32) + del transpose_3 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (9x1xf32) <- (9x1xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.unsqueeze: (1x9x1xf32) <- (9x1xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.unsqueeze: (1x9x1x1xf32) <- (1x9x1xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.greater_than: (1x9x1x1xb) <- (1x9x1x1xf32, xf32) + greater_than_0 = paddle._C_ops.greater_than(unsqueeze_1, full_1) + del unsqueeze_1 + + # pd_op.cast: (1x9x1x1xf32) <- (1x9x1x1xb) + cast_1 = paddle._C_ops.cast(greater_than_0, paddle.float32) + del greater_than_0 + + # pd_op.full: (9xf32) <- () + full_2 = paddle._C_ops.full( + [9], float("1"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.diag: (9x9xf32) <- (9xf32) + diag_0 = paddle._C_ops.diag(full_2, 0, float("0")) + del full_2 + + # pd_op.scale: (9x9xf32) <- (9x9xf32, 1xf32) + scale_1 = paddle._C_ops.scale(diag_0, full_0, float("0"), True) + del diag_0, full_0 + + # pd_op.cast: (9x9xf32) <- (9x9xf32) + cast_2 = paddle._C_ops.cast(scale_1, paddle.float32) + del scale_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [2, 3] + + # pd_op.unsqueeze: (9x9x1x1xf32) <- (9x9xf32, 2xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(cast_2, full_int_array_2) + del cast_2, full_int_array_2 + + # pd_op.add: (9x9x1x1xf32) <- (1x9x1x1xf32, 9x9x1x1xf32) + add_0 = paddle._C_ops.add(cast_1, unsqueeze_2) + del cast_1, unsqueeze_2 + + # pd_op.greater_than: (9x9x1x1xb) <- (9x9x1x1xf32, xf32) + greater_than_1 = paddle._C_ops.greater_than(add_0, full_1) + del add_0, full_1 + + # pd_op.cast: (9x9x1x1xf32) <- (9x9x1x1xb) + cast_3 = paddle._C_ops.cast(greater_than_1, paddle.float32) + del greater_than_1 + + # pd_op.embedding: (9x1x768xf32) <- (9x1xi64, 32000x768xf32) + embedding_0 = paddle._C_ops.embedding(transpose_1, parameter_408, -1, False) + del parameter_408, transpose_1 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [1] + + # pd_op.unsqueeze: (9x1x1xi64) <- (9x1xi64, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_3) + + # pd_op.unsqueeze: (1x9x1xi64) <- (9x1xi64, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_0) + del full_int_array_0, transpose_2 + + # pd_op.not_equal: (9x9x1xb) <- (9x1x1xi64, 1x9x1xi64) + not_equal_0 = paddle._C_ops.not_equal(unsqueeze_3, unsqueeze_4) + del unsqueeze_3, unsqueeze_4 + + # pd_op.cast: (9x9x1xi64) <- (9x9x1xb) + cast_4 = paddle._C_ops.cast(not_equal_0, paddle.int64) + del not_equal_0 + + # pd_op.full: (1xi32) <- () + full_4 = paddle._C_ops.full( + [1], float("2"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.one_hot: (9x9x1x2xf32) <- (9x9x1xi64, 1xi32) + one_hot_0 = paddle._C_ops.one_hot( + cast_4 % paddle.cast(full_4, cast_4.dtype), full_4 + ) + del cast_4, full_4 + + # pd_op.cast: (9x9x1x2xf32) <- (9x9x1x2xf32) + cast_5 = paddle._C_ops.cast(one_hot_0, paddle.float32) + del one_hot_0 + + # pd_op.full: (1xf64) <- () + full_5 = paddle._C_ops.full( + [1], float("0"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_6 = paddle._C_ops.full( + [1], float("768"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_7 = paddle._C_ops.full( + [1], float("2"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (384xf32) <- (1xf64, 1xf64, 1xf64) + arange_0 = paddle.arange(full_5, full_6, full_7, dtype="float32") + del full_6, full_7 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.00130208"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (384xf32) <- (384xf32, 1xf32) + scale_2 = paddle._C_ops.scale(arange_0, full_8, float("0"), True) + del arange_0, full_8 + + # pd_op.full: (384xf32) <- () + full_9 = paddle._C_ops.full( + [384], + float("10000"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.elementwise_pow: (384xf32) <- (384xf32, 384xf32) + elementwise_pow_0 = paddle._C_ops.elementwise_pow(full_9, scale_2) + del full_9, scale_2 + + # pd_op.full: (384xf32) <- () + full_10 = paddle._C_ops.full( + [384], + float("1"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.divide: (384xf32) <- (384xf32, 384xf32) + divide_0 = paddle._C_ops.divide(full_10, elementwise_pow_0) + del elementwise_pow_0, full_10 + + # pd_op.full: (1xf64) <- () + full_11 = paddle._C_ops.full( + [1], float("9"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_12 = paddle._C_ops.full( + [1], float("-9"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_13 = paddle._C_ops.full( + [1], float("-1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (18xf32) <- (1xf64, 1xf64, 1xf64) + arange_1 = paddle.arange(full_11, full_12, full_13, dtype="float32") + del full_12, full_13 + + # builtin.combine: ([18xf32, 384xf32]) <- (18xf32, 384xf32) + combine_0 = [arange_1, divide_0] + del arange_1, divide_0 + + # pd_op.einsum: (18x384xf32, [0xf32, 0xf32], [18xf32, 384xf32]) <- ([18xf32, 384xf32]) + einsum_0, einsum_1, einsum_2 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_0, "i,d->id"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_0 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_0, + split_1, + ) = einsum_1 + del einsum_1 + + # builtin.split: (18xf32, 384xf32) <- ([18xf32, 384xf32]) + ( + split_2, + split_3, + ) = einsum_2 + del einsum_2 + + # pd_op.sin: (18x384xf32) <- (18x384xf32) + sin_0 = paddle._C_ops.sin(einsum_0) + + # pd_op.cos: (18x384xf32) <- (18x384xf32) + cos_0 = paddle._C_ops.cos(einsum_0) + del einsum_0 + + # pd_op.full: (1xi32) <- () + full_14 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([18x384xf32, 18x384xf32]) <- (18x384xf32, 18x384xf32) + combine_1 = [sin_0, cos_0] + del cos_0, sin_0 + + # pd_op.concat: (18x768xf32) <- ([18x384xf32, 18x384xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_1, full_14) + del combine_1, full_14 + + # pd_op.unsqueeze: (18x1x768xf32) <- (18x768xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(concat_0, full_int_array_3) + del concat_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 1, -1] + + # pd_op.expand: (18x1x768xf32) <- (18x1x768xf32, 3xi64) + expand_0 = paddle._C_ops.expand(unsqueeze_5, full_int_array_4) + del full_int_array_4, unsqueeze_5 + + # pd_op.dropout: (18x1x768xf32, 18x1x768xui8) <- (18x1x768xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + expand_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del expand_0 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_407, False, False) + del parameter_407 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_5 = [9, 1, 12, 64] + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_0, full_int_array_5) + del matmul_0 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_406, False, False) + del parameter_406 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_1, full_int_array_5) + del matmul_1 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_405, False, False) + del parameter_405 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_2, full_int_array_5) + del matmul_2 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(dropout_2, parameter_403, False, False) + del parameter_403 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_6 = [18, -1, 12, 64] + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(matmul_3, full_int_array_6) + del matmul_3 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_1 = paddle._C_ops.add(reshape_0, parameter_400) + del parameter_400 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_2 = [add_1, reshape_1] + del add_1, reshape_1 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_3, einsum_4, einsum_5 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_2, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_2 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_4, + split_5, + ) = einsum_4 + del einsum_4 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_6, + split_7, + ) = einsum_5 + del einsum_5 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_2 = paddle._C_ops.add(reshape_0, parameter_402) + del parameter_402 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_3 = [add_2, reshape_3] + del add_2, reshape_3 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_6, einsum_7, einsum_8 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_3, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_3 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_8, + split_9, + ) = einsum_7 + del einsum_7 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_10, + split_11, + ) = einsum_8 + del einsum_8 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_7 = [1, 12, 18, 9] + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(einsum_6, full_int_array_7) + del einsum_6 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [2147483647] + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + reshape_4, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_4 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, 12, 9, 17] + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(slice_0, full_int_array_9) + del slice_0 + + # pd_op.full: (1xf64) <- () + full_15 = paddle._C_ops.full( + [1], float("1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (9xi64) <- (1xf64, 1xf64, 1xf64) + arange_2 = paddle.arange(full_5, full_11, full_15, dtype="int64") + del full_11, full_15, full_5 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_0 = paddle._C_ops.index_select(reshape_5, arange_2, 3) + del reshape_5 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_3 = paddle._C_ops.add(reshape_0, parameter_401) + del parameter_401, reshape_0 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_4 = [add_3, parameter_399] + del add_3, parameter_399 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_9, einsum_10, einsum_11 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_4, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_4 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_12, + split_13, + ) = einsum_10 + del einsum_10 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_14, + split_15, + ) = einsum_11 + del einsum_11 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_5 = [cast_5, einsum_9] + del einsum_9 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_12, einsum_13, einsum_14 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_5, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_5 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_16, + split_17, + ) = einsum_13 + del einsum_13 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_18, + split_19, + ) = einsum_14 + del einsum_14 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_4 = paddle._C_ops.add(einsum_3, index_select_0) + del einsum_3, index_select_0 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_5 = paddle._C_ops.add(add_4, einsum_12) + del add_4, einsum_12 + + # pd_op.full: (1xf32) <- () + full_16 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_3 = paddle._C_ops.scale(add_5, full_16, float("0"), True) + del add_5 + + # pd_op.transpose: (1x1x9x9xf32) <- (9x9x1x1xf32) + transpose_4 = paddle._C_ops.transpose(cast_3, [2, 3, 0, 1]) + del cast_3 + + # pd_op.full: (1xf32) <- () + full_17 = paddle._C_ops.full( + [1], float("1e+30"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x9x9xf32) <- (1x1x9x9xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_4, full_17, float("0"), True) + del full_17, transpose_4 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_0 = paddle._C_ops.subtract(scale_3, scale_4) + del scale_3 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_0 = paddle._C_ops.softmax(subtract_0, 3) + del subtract_0 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_6 = [dropout_4, reshape_2] + del dropout_4, reshape_2 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_15, einsum_16, einsum_17 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_6, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_6 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_20, + split_21, + ) = einsum_16 + del einsum_16 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_22, + split_23, + ) = einsum_17 + del einsum_17 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_10 = [9, 1, 768] + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_6 = paddle._C_ops.reshape(einsum_15, full_int_array_10) + del einsum_15 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_7 = [reshape_6, parameter_404] + del parameter_404, reshape_6 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_18, einsum_19, einsum_20 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_7, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_7 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_24, + split_25, + ) = einsum_19 + del einsum_19 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_26, + split_27, + ) = einsum_20 + del einsum_20 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_18 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_6 = paddle._C_ops.add(dropout_6, dropout_0) + del dropout_0, dropout_6 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_398, parameter_397, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_6, parameter_397, parameter_398 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_0, parameter_394, False, False) + del parameter_394 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_7 = paddle._C_ops.add(matmul_4, parameter_393) + del matmul_4, parameter_393 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_0 = paddle._C_ops.relu(add_7) + del add_7 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_0 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_5 = paddle._C_ops.matmul(dropout_8, parameter_392, False, False) + del dropout_8, parameter_392 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_8 = paddle._C_ops.add(matmul_5, parameter_391) + del matmul_5, parameter_391 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_8 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_9 = paddle._C_ops.add(dropout_10, layer_norm_0) + del dropout_10, layer_norm_0 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_9, parameter_396, parameter_395, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_9, parameter_395, parameter_396 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_3, parameter_390, False, False) + del parameter_390 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(matmul_6, full_int_array_5) + del matmul_6 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_389, False, False) + del parameter_389 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_7, full_int_array_5) + del matmul_7 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_3, parameter_388, False, False) + del parameter_388 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_8, full_int_array_5) + del matmul_8 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(dropout_2, parameter_386, False, False) + del parameter_386 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_9, full_int_array_6) + del matmul_9 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_10 = paddle._C_ops.add(reshape_7, parameter_383) + del parameter_383 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_8 = [add_10, reshape_8] + del add_10, reshape_8 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_21, einsum_22, einsum_23 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_8, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_8 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_28, + split_29, + ) = einsum_22 + del einsum_22 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_30, + split_31, + ) = einsum_23 + del einsum_23 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_11 = paddle._C_ops.add(reshape_7, parameter_385) + del parameter_385 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_9 = [add_11, reshape_10] + del add_11, reshape_10 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_24, einsum_25, einsum_26 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_9, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_9 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_32, + split_33, + ) = einsum_25 + del einsum_25 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_34, + split_35, + ) = einsum_26 + del einsum_26 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(einsum_24, full_int_array_7) + del einsum_24 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + reshape_11, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_11 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(slice_1, full_int_array_9) + del slice_1 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_1 = paddle._C_ops.index_select(reshape_12, arange_2, 3) + del reshape_12 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_12 = paddle._C_ops.add(reshape_7, parameter_384) + del parameter_384, reshape_7 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_10 = [add_12, parameter_382] + del add_12, parameter_382 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_27, einsum_28, einsum_29 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_10, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_10 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_36, + split_37, + ) = einsum_28 + del einsum_28 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_38, + split_39, + ) = einsum_29 + del einsum_29 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_11 = [cast_5, einsum_27] + del einsum_27 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_30, einsum_31, einsum_32 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_11, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_11 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_40, + split_41, + ) = einsum_31 + del einsum_31 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_42, + split_43, + ) = einsum_32 + del einsum_32 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_13 = paddle._C_ops.add(einsum_21, index_select_1) + del einsum_21, index_select_1 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_14 = paddle._C_ops.add(add_13, einsum_30) + del add_13, einsum_30 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_5 = paddle._C_ops.scale(add_14, full_16, float("0"), True) + del add_14 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_1 = paddle._C_ops.subtract(scale_5, scale_4) + del scale_5 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_1 = paddle._C_ops.softmax(subtract_1, 3) + del subtract_1 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_12 = [dropout_12, reshape_9] + del dropout_12, reshape_9 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_33, einsum_34, einsum_35 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_12, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_12 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_44, + split_45, + ) = einsum_34 + del einsum_34 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_46, + split_47, + ) = einsum_35 + del einsum_35 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_13 = paddle._C_ops.reshape(einsum_33, full_int_array_10) + del einsum_33 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_13 = [reshape_13, parameter_387] + del parameter_387, reshape_13 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_36, einsum_37, einsum_38 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_13, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_13 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_48, + split_49, + ) = einsum_37 + del einsum_37 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_50, + split_51, + ) = einsum_38 + del einsum_38 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_36, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_36 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_15 = paddle._C_ops.add(dropout_14, layer_norm_3) + del dropout_14, layer_norm_3 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_15, parameter_381, parameter_380, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_15, parameter_380, parameter_381 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_377, False, False) + del parameter_377 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_16 = paddle._C_ops.add(matmul_10, parameter_376) + del matmul_10, parameter_376 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_1 = paddle._C_ops.relu(add_16) + del add_16 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_1 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_11 = paddle._C_ops.matmul(dropout_16, parameter_375, False, False) + del dropout_16, parameter_375 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_11, parameter_374) + del matmul_11, parameter_374 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_17 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_18 = paddle._C_ops.add(dropout_18, layer_norm_6) + del dropout_18, layer_norm_6 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_379, parameter_378, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18, parameter_378, parameter_379 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_9, parameter_373, False, False) + del parameter_373 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_12, full_int_array_5) + del matmul_12 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_13 = paddle._C_ops.matmul(layer_norm_9, parameter_372, False, False) + del parameter_372 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(matmul_13, full_int_array_5) + del matmul_13 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_9, parameter_371, False, False) + del parameter_371 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_14, full_int_array_5) + del matmul_14 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_15 = paddle._C_ops.matmul(dropout_2, parameter_369, False, False) + del parameter_369 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_15, full_int_array_6) + del matmul_15 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_19 = paddle._C_ops.add(reshape_14, parameter_366) + del parameter_366 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_14 = [add_19, reshape_15] + del add_19, reshape_15 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_39, einsum_40, einsum_41 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_14, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_14 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_52, + split_53, + ) = einsum_40 + del einsum_40 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_54, + split_55, + ) = einsum_41 + del einsum_41 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_20 = paddle._C_ops.add(reshape_14, parameter_368) + del parameter_368 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_15 = [add_20, reshape_17] + del add_20, reshape_17 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_42, einsum_43, einsum_44 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_15, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_15 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_56, + split_57, + ) = einsum_43 + del einsum_43 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_58, + split_59, + ) = einsum_44 + del einsum_44 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(einsum_42, full_int_array_7) + del einsum_42 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + reshape_18, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_18 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_19 = paddle._C_ops.reshape(slice_2, full_int_array_9) + del slice_2 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_2 = paddle._C_ops.index_select(reshape_19, arange_2, 3) + del reshape_19 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_21 = paddle._C_ops.add(reshape_14, parameter_367) + del parameter_367, reshape_14 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_16 = [add_21, parameter_365] + del add_21, parameter_365 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_45, einsum_46, einsum_47 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_16, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_16 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_60, + split_61, + ) = einsum_46 + del einsum_46 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_62, + split_63, + ) = einsum_47 + del einsum_47 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_17 = [cast_5, einsum_45] + del einsum_45 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_48, einsum_49, einsum_50 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_17, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_17 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_64, + split_65, + ) = einsum_49 + del einsum_49 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_66, + split_67, + ) = einsum_50 + del einsum_50 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_22 = paddle._C_ops.add(einsum_39, index_select_2) + del einsum_39, index_select_2 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_23 = paddle._C_ops.add(add_22, einsum_48) + del add_22, einsum_48 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_6 = paddle._C_ops.scale(add_23, full_16, float("0"), True) + del add_23 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_2 = paddle._C_ops.subtract(scale_6, scale_4) + del scale_6 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_2 = paddle._C_ops.softmax(subtract_2, 3) + del subtract_2 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_18 = [dropout_20, reshape_16] + del dropout_20, reshape_16 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_51, einsum_52, einsum_53 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_18, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_18 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_68, + split_69, + ) = einsum_52 + del einsum_52 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_70, + split_71, + ) = einsum_53 + del einsum_53 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_20 = paddle._C_ops.reshape(einsum_51, full_int_array_10) + del einsum_51 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_19 = [reshape_20, parameter_370] + del parameter_370, reshape_20 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_54, einsum_55, einsum_56 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_19, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_19 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_72, + split_73, + ) = einsum_55 + del einsum_55 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_74, + split_75, + ) = einsum_56 + del einsum_56 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_54, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_54 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_24 = paddle._C_ops.add(dropout_22, layer_norm_9) + del dropout_22, layer_norm_9 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_24, parameter_364, parameter_363, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_24, parameter_363, parameter_364 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_12, parameter_360, False, False) + del parameter_360 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_25 = paddle._C_ops.add(matmul_16, parameter_359) + del matmul_16, parameter_359 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_2 = paddle._C_ops.relu(add_25) + del add_25 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_2 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_17 = paddle._C_ops.matmul(dropout_24, parameter_358, False, False) + del dropout_24, parameter_358 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_17, parameter_357) + del matmul_17, parameter_357 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_26, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_26 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_27 = paddle._C_ops.add(dropout_26, layer_norm_12) + del dropout_26, layer_norm_12 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_27, parameter_362, parameter_361, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_27, parameter_361, parameter_362 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_15, parameter_356, False, False) + del parameter_356 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_18, full_int_array_5) + del matmul_18 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_15, parameter_355, False, False) + del parameter_355 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_19, full_int_array_5) + del matmul_19 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_20 = paddle._C_ops.matmul(layer_norm_15, parameter_354, False, False) + del parameter_354 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_23 = paddle._C_ops.reshape(matmul_20, full_int_array_5) + del matmul_20 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_21 = paddle._C_ops.matmul(dropout_2, parameter_352, False, False) + del parameter_352 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_21, full_int_array_6) + del matmul_21 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_28 = paddle._C_ops.add(reshape_21, parameter_349) + del parameter_349 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_20 = [add_28, reshape_22] + del add_28, reshape_22 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_57, einsum_58, einsum_59 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_20, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_20 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_76, + split_77, + ) = einsum_58 + del einsum_58 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_78, + split_79, + ) = einsum_59 + del einsum_59 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_29 = paddle._C_ops.add(reshape_21, parameter_351) + del parameter_351 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_21 = [add_29, reshape_24] + del add_29, reshape_24 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_60, einsum_61, einsum_62 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_21, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_21 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_80, + split_81, + ) = einsum_61 + del einsum_61 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_82, + split_83, + ) = einsum_62 + del einsum_62 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(einsum_60, full_int_array_7) + del einsum_60 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + reshape_25, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_25 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(slice_3, full_int_array_9) + del slice_3 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_3 = paddle._C_ops.index_select(reshape_26, arange_2, 3) + del reshape_26 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_30 = paddle._C_ops.add(reshape_21, parameter_350) + del parameter_350, reshape_21 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_22 = [add_30, parameter_348] + del add_30, parameter_348 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_63, einsum_64, einsum_65 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_22, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_22 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_84, + split_85, + ) = einsum_64 + del einsum_64 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_86, + split_87, + ) = einsum_65 + del einsum_65 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_23 = [cast_5, einsum_63] + del einsum_63 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_66, einsum_67, einsum_68 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_23, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_23 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_88, + split_89, + ) = einsum_67 + del einsum_67 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_90, + split_91, + ) = einsum_68 + del einsum_68 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_31 = paddle._C_ops.add(einsum_57, index_select_3) + del einsum_57, index_select_3 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_32 = paddle._C_ops.add(add_31, einsum_66) + del add_31, einsum_66 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_7 = paddle._C_ops.scale(add_32, full_16, float("0"), True) + del add_32 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_3 = paddle._C_ops.subtract(scale_7, scale_4) + del scale_7 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_3 = paddle._C_ops.softmax(subtract_3, 3) + del subtract_3 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_24 = [dropout_28, reshape_23] + del dropout_28, reshape_23 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_69, einsum_70, einsum_71 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_24, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_24 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_92, + split_93, + ) = einsum_70 + del einsum_70 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_94, + split_95, + ) = einsum_71 + del einsum_71 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(einsum_69, full_int_array_10) + del einsum_69 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_25 = [reshape_27, parameter_353] + del parameter_353, reshape_27 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_72, einsum_73, einsum_74 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_25, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_25 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_96, + split_97, + ) = einsum_73 + del einsum_73 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_98, + split_99, + ) = einsum_74 + del einsum_74 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_72, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_72 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_33 = paddle._C_ops.add(dropout_30, layer_norm_15) + del dropout_30, layer_norm_15 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_33, parameter_347, parameter_346, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_33, parameter_346, parameter_347 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_18, parameter_343, False, False) + del parameter_343 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_34 = paddle._C_ops.add(matmul_22, parameter_342) + del matmul_22, parameter_342 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_3 = paddle._C_ops.relu(add_34) + del add_34 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_3 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_23 = paddle._C_ops.matmul(dropout_32, parameter_341, False, False) + del dropout_32, parameter_341 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_35 = paddle._C_ops.add(matmul_23, parameter_340) + del matmul_23, parameter_340 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_35, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_35 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_36 = paddle._C_ops.add(dropout_34, layer_norm_18) + del dropout_34, layer_norm_18 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_36, parameter_345, parameter_344, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_36, parameter_344, parameter_345 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_21, parameter_339, False, False) + del parameter_339 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_24, full_int_array_5) + del matmul_24 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_21, parameter_338, False, False) + del parameter_338 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_25, full_int_array_5) + del matmul_25 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_21, parameter_337, False, False) + del parameter_337 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_26, full_int_array_5) + del matmul_26 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(dropout_2, parameter_335, False, False) + del parameter_335 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(matmul_27, full_int_array_6) + del matmul_27 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_37 = paddle._C_ops.add(reshape_28, parameter_332) + del parameter_332 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_26 = [add_37, reshape_29] + del add_37, reshape_29 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_75, einsum_76, einsum_77 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_26, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_26 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_100, + split_101, + ) = einsum_76 + del einsum_76 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_102, + split_103, + ) = einsum_77 + del einsum_77 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_38 = paddle._C_ops.add(reshape_28, parameter_334) + del parameter_334 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_27 = [add_38, reshape_31] + del add_38, reshape_31 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_78, einsum_79, einsum_80 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_27, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_27 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_104, + split_105, + ) = einsum_79 + del einsum_79 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_106, + split_107, + ) = einsum_80 + del einsum_80 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(einsum_78, full_int_array_7) + del einsum_78 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + reshape_32, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_32 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(slice_4, full_int_array_9) + del slice_4 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_4 = paddle._C_ops.index_select(reshape_33, arange_2, 3) + del reshape_33 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_39 = paddle._C_ops.add(reshape_28, parameter_333) + del parameter_333, reshape_28 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_28 = [add_39, parameter_331] + del add_39, parameter_331 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_81, einsum_82, einsum_83 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_28, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_28 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_108, + split_109, + ) = einsum_82 + del einsum_82 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_110, + split_111, + ) = einsum_83 + del einsum_83 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_29 = [cast_5, einsum_81] + del einsum_81 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_84, einsum_85, einsum_86 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_29, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_29 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_112, + split_113, + ) = einsum_85 + del einsum_85 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_114, + split_115, + ) = einsum_86 + del einsum_86 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_40 = paddle._C_ops.add(einsum_75, index_select_4) + del einsum_75, index_select_4 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_41 = paddle._C_ops.add(add_40, einsum_84) + del add_40, einsum_84 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_8 = paddle._C_ops.scale(add_41, full_16, float("0"), True) + del add_41 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_4 = paddle._C_ops.subtract(scale_8, scale_4) + del scale_8 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_4 = paddle._C_ops.softmax(subtract_4, 3) + del subtract_4 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_30 = [dropout_36, reshape_30] + del dropout_36, reshape_30 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_87, einsum_88, einsum_89 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_30, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_30 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_116, + split_117, + ) = einsum_88 + del einsum_88 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_118, + split_119, + ) = einsum_89 + del einsum_89 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_34 = paddle._C_ops.reshape(einsum_87, full_int_array_10) + del einsum_87 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_31 = [reshape_34, parameter_336] + del parameter_336, reshape_34 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_90, einsum_91, einsum_92 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_31, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_31 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_120, + split_121, + ) = einsum_91 + del einsum_91 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_122, + split_123, + ) = einsum_92 + del einsum_92 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_90, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_90 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_42 = paddle._C_ops.add(dropout_38, layer_norm_21) + del dropout_38, layer_norm_21 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_330, parameter_329, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_42, parameter_329, parameter_330 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_28 = paddle._C_ops.matmul(layer_norm_24, parameter_326, False, False) + del parameter_326 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_43 = paddle._C_ops.add(matmul_28, parameter_325) + del matmul_28, parameter_325 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_4 = paddle._C_ops.relu(add_43) + del add_43 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_4 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_29 = paddle._C_ops.matmul(dropout_40, parameter_324, False, False) + del dropout_40, parameter_324 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_29, parameter_323) + del matmul_29, parameter_323 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_44, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_44 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_45 = paddle._C_ops.add(dropout_42, layer_norm_24) + del dropout_42, layer_norm_24 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_328, parameter_327, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_45, parameter_327, parameter_328 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_27, parameter_322, False, False) + del parameter_322 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(matmul_30, full_int_array_5) + del matmul_30 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_27, parameter_321, False, False) + del parameter_321 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_31, full_int_array_5) + del matmul_31 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_27, parameter_320, False, False) + del parameter_320 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_32, full_int_array_5) + del matmul_32 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(dropout_2, parameter_318, False, False) + del parameter_318 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_33, full_int_array_6) + del matmul_33 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_46 = paddle._C_ops.add(reshape_35, parameter_315) + del parameter_315 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_32 = [add_46, reshape_36] + del add_46, reshape_36 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_93, einsum_94, einsum_95 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_32, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_32 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_124, + split_125, + ) = einsum_94 + del einsum_94 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_126, + split_127, + ) = einsum_95 + del einsum_95 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_47 = paddle._C_ops.add(reshape_35, parameter_317) + del parameter_317 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_33 = [add_47, reshape_38] + del add_47, reshape_38 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_96, einsum_97, einsum_98 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_33, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_33 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_128, + split_129, + ) = einsum_97 + del einsum_97 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_130, + split_131, + ) = einsum_98 + del einsum_98 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(einsum_96, full_int_array_7) + del einsum_96 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + reshape_39, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_39 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(slice_5, full_int_array_9) + del slice_5 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_5 = paddle._C_ops.index_select(reshape_40, arange_2, 3) + del reshape_40 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_48 = paddle._C_ops.add(reshape_35, parameter_316) + del parameter_316, reshape_35 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_34 = [add_48, parameter_314] + del add_48, parameter_314 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_99, einsum_100, einsum_101 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_34, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_34 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_132, + split_133, + ) = einsum_100 + del einsum_100 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_134, + split_135, + ) = einsum_101 + del einsum_101 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_35 = [cast_5, einsum_99] + del einsum_99 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_102, einsum_103, einsum_104 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_35, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_35 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_136, + split_137, + ) = einsum_103 + del einsum_103 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_138, + split_139, + ) = einsum_104 + del einsum_104 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_49 = paddle._C_ops.add(einsum_93, index_select_5) + del einsum_93, index_select_5 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_50 = paddle._C_ops.add(add_49, einsum_102) + del add_49, einsum_102 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_9 = paddle._C_ops.scale(add_50, full_16, float("0"), True) + del add_50 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_5 = paddle._C_ops.subtract(scale_9, scale_4) + del scale_9 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_5 = paddle._C_ops.softmax(subtract_5, 3) + del subtract_5 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_36 = [dropout_44, reshape_37] + del dropout_44, reshape_37 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_105, einsum_106, einsum_107 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_36, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_36 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_140, + split_141, + ) = einsum_106 + del einsum_106 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_142, + split_143, + ) = einsum_107 + del einsum_107 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(einsum_105, full_int_array_10) + del einsum_105 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_37 = [reshape_41, parameter_319] + del parameter_319, reshape_41 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_108, einsum_109, einsum_110 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_37, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_37 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_144, + split_145, + ) = einsum_109 + del einsum_109 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_146, + split_147, + ) = einsum_110 + del einsum_110 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_108, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_108 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_51 = paddle._C_ops.add(dropout_46, layer_norm_27) + del dropout_46, layer_norm_27 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_51, parameter_313, parameter_312, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_51, parameter_312, parameter_313 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_30, parameter_309, False, False) + del parameter_309 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_52 = paddle._C_ops.add(matmul_34, parameter_308) + del matmul_34, parameter_308 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_5 = paddle._C_ops.relu(add_52) + del add_52 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_5 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_35 = paddle._C_ops.matmul(dropout_48, parameter_307, False, False) + del dropout_48, parameter_307 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_53 = paddle._C_ops.add(matmul_35, parameter_306) + del matmul_35, parameter_306 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_53, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_53 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_54 = paddle._C_ops.add(dropout_50, layer_norm_30) + del dropout_50, layer_norm_30 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_54, parameter_311, parameter_310, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_54, parameter_310, parameter_311 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_33, parameter_305, False, False) + del parameter_305 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_36, full_int_array_5) + del matmul_36 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_37 = paddle._C_ops.matmul(layer_norm_33, parameter_304, False, False) + del parameter_304 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(matmul_37, full_int_array_5) + del matmul_37 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_33, parameter_303, False, False) + del parameter_303 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_38, full_int_array_5) + del matmul_38 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_39 = paddle._C_ops.matmul(dropout_2, parameter_301, False, False) + del parameter_301 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_39, full_int_array_6) + del matmul_39 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_55 = paddle._C_ops.add(reshape_42, parameter_298) + del parameter_298 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_38 = [add_55, reshape_43] + del add_55, reshape_43 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_111, einsum_112, einsum_113 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_38, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_38 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_148, + split_149, + ) = einsum_112 + del einsum_112 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_150, + split_151, + ) = einsum_113 + del einsum_113 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_56 = paddle._C_ops.add(reshape_42, parameter_300) + del parameter_300 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_39 = [add_56, reshape_45] + del add_56, reshape_45 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_114, einsum_115, einsum_116 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_39, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_39 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_152, + split_153, + ) = einsum_115 + del einsum_115 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_154, + split_155, + ) = einsum_116 + del einsum_116 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(einsum_114, full_int_array_7) + del einsum_114 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + reshape_46, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_46 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(slice_6, full_int_array_9) + del slice_6 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_6 = paddle._C_ops.index_select(reshape_47, arange_2, 3) + del reshape_47 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_57 = paddle._C_ops.add(reshape_42, parameter_299) + del parameter_299, reshape_42 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_40 = [add_57, parameter_297] + del add_57, parameter_297 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_117, einsum_118, einsum_119 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_40, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_40 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_156, + split_157, + ) = einsum_118 + del einsum_118 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_158, + split_159, + ) = einsum_119 + del einsum_119 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_41 = [cast_5, einsum_117] + del einsum_117 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_120, einsum_121, einsum_122 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_41, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_41 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_160, + split_161, + ) = einsum_121 + del einsum_121 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_162, + split_163, + ) = einsum_122 + del einsum_122 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_58 = paddle._C_ops.add(einsum_111, index_select_6) + del einsum_111, index_select_6 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_59 = paddle._C_ops.add(add_58, einsum_120) + del add_58, einsum_120 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_10 = paddle._C_ops.scale(add_59, full_16, float("0"), True) + del add_59 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_6 = paddle._C_ops.subtract(scale_10, scale_4) + del scale_10 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_6 = paddle._C_ops.softmax(subtract_6, 3) + del subtract_6 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_42 = [dropout_52, reshape_44] + del dropout_52, reshape_44 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_123, einsum_124, einsum_125 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_42, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_42 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_164, + split_165, + ) = einsum_124 + del einsum_124 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_166, + split_167, + ) = einsum_125 + del einsum_125 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(einsum_123, full_int_array_10) + del einsum_123 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_43 = [reshape_48, parameter_302] + del parameter_302, reshape_48 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_126, einsum_127, einsum_128 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_43, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_43 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_168, + split_169, + ) = einsum_127 + del einsum_127 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_170, + split_171, + ) = einsum_128 + del einsum_128 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_126, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_126 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_60 = paddle._C_ops.add(dropout_54, layer_norm_33) + del dropout_54, layer_norm_33 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_60, parameter_296, parameter_295, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_60, parameter_295, parameter_296 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_36, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_61 = paddle._C_ops.add(matmul_40, parameter_291) + del matmul_40, parameter_291 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_6 = paddle._C_ops.relu(add_61) + del add_61 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_6 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_41 = paddle._C_ops.matmul(dropout_56, parameter_290, False, False) + del dropout_56, parameter_290 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_62 = paddle._C_ops.add(matmul_41, parameter_289) + del matmul_41, parameter_289 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_62, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_62 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_63 = paddle._C_ops.add(dropout_58, layer_norm_36) + del dropout_58, layer_norm_36 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_63, parameter_294, parameter_293, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_63, parameter_293, parameter_294 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_39, parameter_288, False, False) + del parameter_288 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(matmul_42, full_int_array_5) + del matmul_42 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_39, parameter_287, False, False) + del parameter_287 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(matmul_43, full_int_array_5) + del matmul_43 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_44 = paddle._C_ops.matmul(layer_norm_39, parameter_286, False, False) + del parameter_286 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_51 = paddle._C_ops.reshape(matmul_44, full_int_array_5) + del matmul_44 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_45 = paddle._C_ops.matmul(dropout_2, parameter_284, False, False) + del parameter_284 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(matmul_45, full_int_array_6) + del matmul_45 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_64 = paddle._C_ops.add(reshape_49, parameter_281) + del parameter_281 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_44 = [add_64, reshape_50] + del add_64, reshape_50 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_129, einsum_130, einsum_131 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_44, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_44 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_172, + split_173, + ) = einsum_130 + del einsum_130 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_174, + split_175, + ) = einsum_131 + del einsum_131 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_65 = paddle._C_ops.add(reshape_49, parameter_283) + del parameter_283 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_45 = [add_65, reshape_52] + del add_65, reshape_52 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_132, einsum_133, einsum_134 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_45, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_45 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_176, + split_177, + ) = einsum_133 + del einsum_133 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_178, + split_179, + ) = einsum_134 + del einsum_134 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(einsum_132, full_int_array_7) + del einsum_132 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + reshape_53, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_53 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(slice_7, full_int_array_9) + del slice_7 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_7 = paddle._C_ops.index_select(reshape_54, arange_2, 3) + del reshape_54 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_66 = paddle._C_ops.add(reshape_49, parameter_282) + del parameter_282, reshape_49 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_46 = [add_66, parameter_280] + del add_66, parameter_280 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_135, einsum_136, einsum_137 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_46, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_46 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_180, + split_181, + ) = einsum_136 + del einsum_136 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_182, + split_183, + ) = einsum_137 + del einsum_137 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_47 = [cast_5, einsum_135] + del einsum_135 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_138, einsum_139, einsum_140 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_47, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_47 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_184, + split_185, + ) = einsum_139 + del einsum_139 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_186, + split_187, + ) = einsum_140 + del einsum_140 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_67 = paddle._C_ops.add(einsum_129, index_select_7) + del einsum_129, index_select_7 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_68 = paddle._C_ops.add(add_67, einsum_138) + del add_67, einsum_138 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_11 = paddle._C_ops.scale(add_68, full_16, float("0"), True) + del add_68 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_7 = paddle._C_ops.subtract(scale_11, scale_4) + del scale_11 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_7 = paddle._C_ops.softmax(subtract_7, 3) + del subtract_7 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_48 = [dropout_60, reshape_51] + del dropout_60, reshape_51 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_141, einsum_142, einsum_143 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_48, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_48 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_188, + split_189, + ) = einsum_142 + del einsum_142 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_190, + split_191, + ) = einsum_143 + del einsum_143 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(einsum_141, full_int_array_10) + del einsum_141 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_49 = [reshape_55, parameter_285] + del parameter_285, reshape_55 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_144, einsum_145, einsum_146 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_49, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_49 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_192, + split_193, + ) = einsum_145 + del einsum_145 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_194, + split_195, + ) = einsum_146 + del einsum_146 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_144, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_144 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_69 = paddle._C_ops.add(dropout_62, layer_norm_39) + del dropout_62, layer_norm_39 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_69, parameter_279, parameter_278, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_69, parameter_278, parameter_279 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_42, parameter_275, False, False) + del parameter_275 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_70 = paddle._C_ops.add(matmul_46, parameter_274) + del matmul_46, parameter_274 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_7 = paddle._C_ops.relu(add_70) + del add_70 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_7 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_47 = paddle._C_ops.matmul(dropout_64, parameter_273, False, False) + del dropout_64, parameter_273 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_71 = paddle._C_ops.add(matmul_47, parameter_272) + del matmul_47, parameter_272 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_71, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_71 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_72 = paddle._C_ops.add(dropout_66, layer_norm_42) + del dropout_66, layer_norm_42 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_72, parameter_277, parameter_276, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_72, parameter_276, parameter_277 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_45, parameter_271, False, False) + del parameter_271 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(matmul_48, full_int_array_5) + del matmul_48 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_45, parameter_270, False, False) + del parameter_270 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(matmul_49, full_int_array_5) + del matmul_49 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_45, parameter_269, False, False) + del parameter_269 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(matmul_50, full_int_array_5) + del matmul_50 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(dropout_2, parameter_267, False, False) + del parameter_267 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_59 = paddle._C_ops.reshape(matmul_51, full_int_array_6) + del matmul_51 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_73 = paddle._C_ops.add(reshape_56, parameter_264) + del parameter_264 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_50 = [add_73, reshape_57] + del add_73, reshape_57 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_147, einsum_148, einsum_149 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_50, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_50 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_196, + split_197, + ) = einsum_148 + del einsum_148 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_198, + split_199, + ) = einsum_149 + del einsum_149 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_74 = paddle._C_ops.add(reshape_56, parameter_266) + del parameter_266 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_51 = [add_74, reshape_59] + del add_74, reshape_59 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_150, einsum_151, einsum_152 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_51, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_51 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_200, + split_201, + ) = einsum_151 + del einsum_151 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_202, + split_203, + ) = einsum_152 + del einsum_152 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(einsum_150, full_int_array_7) + del einsum_150 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + reshape_60, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_60 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(slice_8, full_int_array_9) + del slice_8 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_8 = paddle._C_ops.index_select(reshape_61, arange_2, 3) + del reshape_61 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_75 = paddle._C_ops.add(reshape_56, parameter_265) + del parameter_265, reshape_56 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_52 = [add_75, parameter_263] + del add_75, parameter_263 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_153, einsum_154, einsum_155 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_52, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_52 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_204, + split_205, + ) = einsum_154 + del einsum_154 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_206, + split_207, + ) = einsum_155 + del einsum_155 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_53 = [cast_5, einsum_153] + del einsum_153 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_156, einsum_157, einsum_158 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_53, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_53 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_208, + split_209, + ) = einsum_157 + del einsum_157 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_210, + split_211, + ) = einsum_158 + del einsum_158 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_76 = paddle._C_ops.add(einsum_147, index_select_8) + del einsum_147, index_select_8 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_77 = paddle._C_ops.add(add_76, einsum_156) + del add_76, einsum_156 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_12 = paddle._C_ops.scale(add_77, full_16, float("0"), True) + del add_77 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_8 = paddle._C_ops.subtract(scale_12, scale_4) + del scale_12 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_8 = paddle._C_ops.softmax(subtract_8, 3) + del subtract_8 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_54 = [dropout_68, reshape_58] + del dropout_68, reshape_58 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_159, einsum_160, einsum_161 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_54, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_54 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_212, + split_213, + ) = einsum_160 + del einsum_160 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_214, + split_215, + ) = einsum_161 + del einsum_161 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_62 = paddle._C_ops.reshape(einsum_159, full_int_array_10) + del einsum_159 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_55 = [reshape_62, parameter_268] + del parameter_268, reshape_62 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_162, einsum_163, einsum_164 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_55, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_55 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_216, + split_217, + ) = einsum_163 + del einsum_163 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_218, + split_219, + ) = einsum_164 + del einsum_164 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_162, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_162 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_78 = paddle._C_ops.add(dropout_70, layer_norm_45) + del dropout_70, layer_norm_45 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_78, parameter_262, parameter_261, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_78, parameter_261, parameter_262 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_52 = paddle._C_ops.matmul(layer_norm_48, parameter_258, False, False) + del parameter_258 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_79 = paddle._C_ops.add(matmul_52, parameter_257) + del matmul_52, parameter_257 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_8 = paddle._C_ops.relu(add_79) + del add_79 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_8 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_53 = paddle._C_ops.matmul(dropout_72, parameter_256, False, False) + del dropout_72, parameter_256 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_53, parameter_255) + del matmul_53, parameter_255 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_80, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_80 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_81 = paddle._C_ops.add(dropout_74, layer_norm_48) + del dropout_74, layer_norm_48 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_260, parameter_259, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_81, parameter_259, parameter_260 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_51, parameter_254, False, False) + del parameter_254 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(matmul_54, full_int_array_5) + del matmul_54 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_51, parameter_253, False, False) + del parameter_253 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(matmul_55, full_int_array_5) + del matmul_55 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_51, parameter_252, False, False) + del parameter_252 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(matmul_56, full_int_array_5) + del matmul_56 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(dropout_2, parameter_250, False, False) + del parameter_250 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(matmul_57, full_int_array_6) + del matmul_57 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_82 = paddle._C_ops.add(reshape_63, parameter_247) + del parameter_247 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_56 = [add_82, reshape_64] + del add_82, reshape_64 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_165, einsum_166, einsum_167 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_56, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_56 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_220, + split_221, + ) = einsum_166 + del einsum_166 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_222, + split_223, + ) = einsum_167 + del einsum_167 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_83 = paddle._C_ops.add(reshape_63, parameter_249) + del parameter_249 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_57 = [add_83, reshape_66] + del add_83, reshape_66 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_168, einsum_169, einsum_170 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_57, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_57 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_224, + split_225, + ) = einsum_169 + del einsum_169 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_226, + split_227, + ) = einsum_170 + del einsum_170 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(einsum_168, full_int_array_7) + del einsum_168 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + reshape_67, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_67 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(slice_9, full_int_array_9) + del slice_9 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_9 = paddle._C_ops.index_select(reshape_68, arange_2, 3) + del reshape_68 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_84 = paddle._C_ops.add(reshape_63, parameter_248) + del parameter_248, reshape_63 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_58 = [add_84, parameter_246] + del add_84, parameter_246 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_171, einsum_172, einsum_173 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_58, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_58 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_228, + split_229, + ) = einsum_172 + del einsum_172 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_230, + split_231, + ) = einsum_173 + del einsum_173 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_59 = [cast_5, einsum_171] + del einsum_171 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_174, einsum_175, einsum_176 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_59, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_59 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_232, + split_233, + ) = einsum_175 + del einsum_175 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_234, + split_235, + ) = einsum_176 + del einsum_176 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_85 = paddle._C_ops.add(einsum_165, index_select_9) + del einsum_165, index_select_9 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_86 = paddle._C_ops.add(add_85, einsum_174) + del add_85, einsum_174 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_13 = paddle._C_ops.scale(add_86, full_16, float("0"), True) + del add_86 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_9 = paddle._C_ops.subtract(scale_13, scale_4) + del scale_13 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_9 = paddle._C_ops.softmax(subtract_9, 3) + del subtract_9 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_60 = [dropout_76, reshape_65] + del dropout_76, reshape_65 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_177, einsum_178, einsum_179 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_60, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_60 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_236, + split_237, + ) = einsum_178 + del einsum_178 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_238, + split_239, + ) = einsum_179 + del einsum_179 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(einsum_177, full_int_array_10) + del einsum_177 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_61 = [reshape_69, parameter_251] + del parameter_251, reshape_69 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_180, einsum_181, einsum_182 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_61, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_61 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_240, + split_241, + ) = einsum_181 + del einsum_181 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_242, + split_243, + ) = einsum_182 + del einsum_182 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_180, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_180 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_87 = paddle._C_ops.add(dropout_78, layer_norm_51) + del dropout_78, layer_norm_51 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_245, parameter_244, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_87, parameter_244, parameter_245 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_54, parameter_241, False, False) + del parameter_241 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_88 = paddle._C_ops.add(matmul_58, parameter_240) + del matmul_58, parameter_240 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_9 = paddle._C_ops.relu(add_88) + del add_88 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_9 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_59 = paddle._C_ops.matmul(dropout_80, parameter_239, False, False) + del dropout_80, parameter_239 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_89 = paddle._C_ops.add(matmul_59, parameter_238) + del matmul_59, parameter_238 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_89, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_89 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_90 = paddle._C_ops.add(dropout_82, layer_norm_54) + del dropout_82, layer_norm_54 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_243, parameter_242, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_90, parameter_242, parameter_243 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_57, parameter_237, False, False) + del parameter_237 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(matmul_60, full_int_array_5) + del matmul_60 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_61 = paddle._C_ops.matmul(layer_norm_57, parameter_236, False, False) + del parameter_236 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(matmul_61, full_int_array_5) + del matmul_61 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_57, parameter_235, False, False) + del parameter_235 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_72 = paddle._C_ops.reshape(matmul_62, full_int_array_5) + del matmul_62 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_63 = paddle._C_ops.matmul(dropout_2, parameter_233, False, False) + del parameter_233 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(matmul_63, full_int_array_6) + del matmul_63 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_91 = paddle._C_ops.add(reshape_70, parameter_230) + del parameter_230 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_62 = [add_91, reshape_71] + del add_91, reshape_71 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_183, einsum_184, einsum_185 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_62, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_62 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_244, + split_245, + ) = einsum_184 + del einsum_184 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_246, + split_247, + ) = einsum_185 + del einsum_185 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_92 = paddle._C_ops.add(reshape_70, parameter_232) + del parameter_232 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_63 = [add_92, reshape_73] + del add_92, reshape_73 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_186, einsum_187, einsum_188 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_63, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_63 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_248, + split_249, + ) = einsum_187 + del einsum_187 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_250, + split_251, + ) = einsum_188 + del einsum_188 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(einsum_186, full_int_array_7) + del einsum_186 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + reshape_74, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_74 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(slice_10, full_int_array_9) + del slice_10 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_10 = paddle._C_ops.index_select(reshape_75, arange_2, 3) + del reshape_75 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_93 = paddle._C_ops.add(reshape_70, parameter_231) + del parameter_231, reshape_70 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_64 = [add_93, parameter_229] + del add_93, parameter_229 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_189, einsum_190, einsum_191 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_64, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_64 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_252, + split_253, + ) = einsum_190 + del einsum_190 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_254, + split_255, + ) = einsum_191 + del einsum_191 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_65 = [cast_5, einsum_189] + del einsum_189 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_192, einsum_193, einsum_194 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_65, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_65 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_256, + split_257, + ) = einsum_193 + del einsum_193 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_258, + split_259, + ) = einsum_194 + del einsum_194 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_94 = paddle._C_ops.add(einsum_183, index_select_10) + del einsum_183, index_select_10 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_95 = paddle._C_ops.add(add_94, einsum_192) + del add_94, einsum_192 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_14 = paddle._C_ops.scale(add_95, full_16, float("0"), True) + del add_95 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_10 = paddle._C_ops.subtract(scale_14, scale_4) + del scale_14 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_10 = paddle._C_ops.softmax(subtract_10, 3) + del subtract_10 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_66 = [dropout_84, reshape_72] + del dropout_84, reshape_72 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_195, einsum_196, einsum_197 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_66, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_66 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_260, + split_261, + ) = einsum_196 + del einsum_196 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_262, + split_263, + ) = einsum_197 + del einsum_197 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(einsum_195, full_int_array_10) + del einsum_195 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_67 = [reshape_76, parameter_234] + del parameter_234, reshape_76 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_198, einsum_199, einsum_200 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_67, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_67 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_264, + split_265, + ) = einsum_199 + del einsum_199 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_266, + split_267, + ) = einsum_200 + del einsum_200 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_198, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_198 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_96 = paddle._C_ops.add(dropout_86, layer_norm_57) + del dropout_86, layer_norm_57 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_96, parameter_228, parameter_227, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_96, parameter_227, parameter_228 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_60, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_97 = paddle._C_ops.add(matmul_64, parameter_223) + del matmul_64, parameter_223 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_10 = paddle._C_ops.relu(add_97) + del add_97 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_10 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_65 = paddle._C_ops.matmul(dropout_88, parameter_222, False, False) + del dropout_88, parameter_222 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_98 = paddle._C_ops.add(matmul_65, parameter_221) + del matmul_65, parameter_221 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_98, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_98 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_99 = paddle._C_ops.add(dropout_90, layer_norm_60) + del dropout_90, layer_norm_60 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_99, parameter_226, parameter_225, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_99, parameter_225, parameter_226 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_63, parameter_220, False, False) + del parameter_220 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(matmul_66, full_int_array_5) + del matmul_66 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_63, parameter_219, False, False) + del parameter_219 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(matmul_67, full_int_array_5) + del matmul_67 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_68 = paddle._C_ops.matmul(layer_norm_63, parameter_218, False, False) + del parameter_218 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(matmul_68, full_int_array_5) + del matmul_68 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(dropout_2, parameter_216, False, False) + del parameter_216 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_80 = paddle._C_ops.reshape(matmul_69, full_int_array_6) + del matmul_69 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_100 = paddle._C_ops.add(reshape_77, parameter_213) + del parameter_213 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_68 = [add_100, reshape_78] + del add_100, reshape_78 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_201, einsum_202, einsum_203 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_68, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_68 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_268, + split_269, + ) = einsum_202 + del einsum_202 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_270, + split_271, + ) = einsum_203 + del einsum_203 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_101 = paddle._C_ops.add(reshape_77, parameter_215) + del parameter_215 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_69 = [add_101, reshape_80] + del add_101, reshape_80 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_204, einsum_205, einsum_206 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_69, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_69 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_272, + split_273, + ) = einsum_205 + del einsum_205 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_274, + split_275, + ) = einsum_206 + del einsum_206 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(einsum_204, full_int_array_7) + del einsum_204 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + reshape_81, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_81 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(slice_11, full_int_array_9) + del slice_11 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_11 = paddle._C_ops.index_select(reshape_82, arange_2, 3) + del reshape_82 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_102 = paddle._C_ops.add(reshape_77, parameter_214) + del parameter_214, reshape_77 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_70 = [add_102, parameter_212] + del add_102, parameter_212 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_207, einsum_208, einsum_209 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_70, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_70 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_276, + split_277, + ) = einsum_208 + del einsum_208 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_278, + split_279, + ) = einsum_209 + del einsum_209 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_71 = [cast_5, einsum_207] + del einsum_207 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_210, einsum_211, einsum_212 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_71, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_71 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_280, + split_281, + ) = einsum_211 + del einsum_211 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_282, + split_283, + ) = einsum_212 + del einsum_212 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_103 = paddle._C_ops.add(einsum_201, index_select_11) + del einsum_201, index_select_11 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_104 = paddle._C_ops.add(add_103, einsum_210) + del add_103, einsum_210 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_15 = paddle._C_ops.scale(add_104, full_16, float("0"), True) + del add_104 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_11 = paddle._C_ops.subtract(scale_15, scale_4) + del scale_15 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_11 = paddle._C_ops.softmax(subtract_11, 3) + del subtract_11 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_72 = [dropout_92, reshape_79] + del dropout_92, reshape_79 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_213, einsum_214, einsum_215 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_72, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_72 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_284, + split_285, + ) = einsum_214 + del einsum_214 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_286, + split_287, + ) = einsum_215 + del einsum_215 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(einsum_213, full_int_array_10) + del einsum_213 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_73 = [reshape_83, parameter_217] + del parameter_217, reshape_83 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_216, einsum_217, einsum_218 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_73, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_73 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_288, + split_289, + ) = einsum_217 + del einsum_217 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_290, + split_291, + ) = einsum_218 + del einsum_218 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_216, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_216 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_105 = paddle._C_ops.add(dropout_94, layer_norm_63) + del dropout_94, layer_norm_63 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_105, parameter_211, parameter_210, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_105, parameter_210, parameter_211 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_66, parameter_207, False, False) + del parameter_207 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_106 = paddle._C_ops.add(matmul_70, parameter_206) + del matmul_70, parameter_206 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_11 = paddle._C_ops.relu(add_106) + del add_106 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_11 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(dropout_96, parameter_205, False, False) + del dropout_96, parameter_205 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_107 = paddle._C_ops.add(matmul_71, parameter_204) + del matmul_71, parameter_204 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_107, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_107 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_108 = paddle._C_ops.add(dropout_98, layer_norm_66) + del dropout_98, layer_norm_66 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_108, parameter_209, parameter_208, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_108, parameter_208, parameter_209 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_69, parameter_203, False, False) + del parameter_203 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_84 = paddle._C_ops.reshape(matmul_72, full_int_array_5) + del matmul_72 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_69, parameter_202, False, False) + del parameter_202 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_85 = paddle._C_ops.reshape(matmul_73, full_int_array_5) + del matmul_73 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_69, parameter_201, False, False) + del parameter_201 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_86 = paddle._C_ops.reshape(matmul_74, full_int_array_5) + del matmul_74 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_75 = paddle._C_ops.matmul(dropout_2, parameter_199, False, False) + del parameter_199 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(matmul_75, full_int_array_6) + del matmul_75 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_109 = paddle._C_ops.add(reshape_84, parameter_196) + del parameter_196 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_74 = [add_109, reshape_85] + del add_109, reshape_85 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_219, einsum_220, einsum_221 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_74, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_74 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_292, + split_293, + ) = einsum_220 + del einsum_220 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_294, + split_295, + ) = einsum_221 + del einsum_221 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_110 = paddle._C_ops.add(reshape_84, parameter_198) + del parameter_198 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_75 = [add_110, reshape_87] + del add_110, reshape_87 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_222, einsum_223, einsum_224 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_75, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_75 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_296, + split_297, + ) = einsum_223 + del einsum_223 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_298, + split_299, + ) = einsum_224 + del einsum_224 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(einsum_222, full_int_array_7) + del einsum_222 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + reshape_88, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_88 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_89 = paddle._C_ops.reshape(slice_12, full_int_array_9) + del slice_12 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_12 = paddle._C_ops.index_select(reshape_89, arange_2, 3) + del reshape_89 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_111 = paddle._C_ops.add(reshape_84, parameter_197) + del parameter_197, reshape_84 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_76 = [add_111, parameter_195] + del add_111, parameter_195 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_225, einsum_226, einsum_227 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_76, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_76 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_300, + split_301, + ) = einsum_226 + del einsum_226 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_302, + split_303, + ) = einsum_227 + del einsum_227 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_77 = [cast_5, einsum_225] + del einsum_225 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_228, einsum_229, einsum_230 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_77, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_77 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_304, + split_305, + ) = einsum_229 + del einsum_229 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_306, + split_307, + ) = einsum_230 + del einsum_230 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_112 = paddle._C_ops.add(einsum_219, index_select_12) + del einsum_219, index_select_12 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_113 = paddle._C_ops.add(add_112, einsum_228) + del add_112, einsum_228 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_16 = paddle._C_ops.scale(add_113, full_16, float("0"), True) + del add_113 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_12 = paddle._C_ops.subtract(scale_16, scale_4) + del scale_16 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_12 = paddle._C_ops.softmax(subtract_12, 3) + del subtract_12 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_12, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_12 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_78 = [dropout_100, reshape_86] + del dropout_100, reshape_86 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_231, einsum_232, einsum_233 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_78, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_78 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_308, + split_309, + ) = einsum_232 + del einsum_232 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_310, + split_311, + ) = einsum_233 + del einsum_233 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_90 = paddle._C_ops.reshape(einsum_231, full_int_array_10) + del einsum_231 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_79 = [reshape_90, parameter_200] + del parameter_200, reshape_90 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_234, einsum_235, einsum_236 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_79, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_79 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_312, + split_313, + ) = einsum_235 + del einsum_235 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_314, + split_315, + ) = einsum_236 + del einsum_236 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_102, dropout_103 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_234, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_234 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_114 = paddle._C_ops.add(dropout_102, layer_norm_69) + del dropout_102, layer_norm_69 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_114, parameter_194, parameter_193, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_114, parameter_193, parameter_194 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_76 = paddle._C_ops.matmul(layer_norm_72, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_115 = paddle._C_ops.add(matmul_76, parameter_189) + del matmul_76, parameter_189 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_12 = paddle._C_ops.relu(add_115) + del add_115 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_104, dropout_105 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_12, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_12 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_77 = paddle._C_ops.matmul(dropout_104, parameter_188, False, False) + del dropout_104, parameter_188 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_116 = paddle._C_ops.add(matmul_77, parameter_187) + del matmul_77, parameter_187 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_106, dropout_107 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_116, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_116 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_117 = paddle._C_ops.add(dropout_106, layer_norm_72) + del dropout_106, layer_norm_72 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_117, parameter_192, parameter_191, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_117, parameter_191, parameter_192 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_75, parameter_186, False, False) + del parameter_186 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(matmul_78, full_int_array_5) + del matmul_78 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_75, parameter_185, False, False) + del parameter_185 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(matmul_79, full_int_array_5) + del matmul_79 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_75, parameter_184, False, False) + del parameter_184 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_93 = paddle._C_ops.reshape(matmul_80, full_int_array_5) + del matmul_80 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(dropout_2, parameter_182, False, False) + del parameter_182 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(matmul_81, full_int_array_6) + del matmul_81 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_118 = paddle._C_ops.add(reshape_91, parameter_179) + del parameter_179 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_80 = [add_118, reshape_92] + del add_118, reshape_92 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_237, einsum_238, einsum_239 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_80, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_80 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_316, + split_317, + ) = einsum_238 + del einsum_238 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_318, + split_319, + ) = einsum_239 + del einsum_239 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_119 = paddle._C_ops.add(reshape_91, parameter_181) + del parameter_181 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_81 = [add_119, reshape_94] + del add_119, reshape_94 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_240, einsum_241, einsum_242 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_81, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_81 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_320, + split_321, + ) = einsum_241 + del einsum_241 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_322, + split_323, + ) = einsum_242 + del einsum_242 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(einsum_240, full_int_array_7) + del einsum_240 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + reshape_95, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_95 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(slice_13, full_int_array_9) + del slice_13 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_13 = paddle._C_ops.index_select(reshape_96, arange_2, 3) + del reshape_96 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_120 = paddle._C_ops.add(reshape_91, parameter_180) + del parameter_180, reshape_91 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_82 = [add_120, parameter_178] + del add_120, parameter_178 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_243, einsum_244, einsum_245 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_82, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_82 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_324, + split_325, + ) = einsum_244 + del einsum_244 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_326, + split_327, + ) = einsum_245 + del einsum_245 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_83 = [cast_5, einsum_243] + del einsum_243 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_246, einsum_247, einsum_248 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_83, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_83 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_328, + split_329, + ) = einsum_247 + del einsum_247 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_330, + split_331, + ) = einsum_248 + del einsum_248 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_121 = paddle._C_ops.add(einsum_237, index_select_13) + del einsum_237, index_select_13 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_122 = paddle._C_ops.add(add_121, einsum_246) + del add_121, einsum_246 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_17 = paddle._C_ops.scale(add_122, full_16, float("0"), True) + del add_122 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_13 = paddle._C_ops.subtract(scale_17, scale_4) + del scale_17 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_13 = paddle._C_ops.softmax(subtract_13, 3) + del subtract_13 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_108, dropout_109 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_13 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_84 = [dropout_108, reshape_93] + del dropout_108, reshape_93 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_249, einsum_250, einsum_251 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_84, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_84 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_332, + split_333, + ) = einsum_250 + del einsum_250 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_334, + split_335, + ) = einsum_251 + del einsum_251 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(einsum_249, full_int_array_10) + del einsum_249 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_85 = [reshape_97, parameter_183] + del parameter_183, reshape_97 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_252, einsum_253, einsum_254 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_85, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_85 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_336, + split_337, + ) = einsum_253 + del einsum_253 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_338, + split_339, + ) = einsum_254 + del einsum_254 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_110, dropout_111 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_252, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_252 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_123 = paddle._C_ops.add(dropout_110, layer_norm_75) + del dropout_110, layer_norm_75 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_177, parameter_176, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_123, parameter_176, parameter_177 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_78, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_124 = paddle._C_ops.add(matmul_82, parameter_172) + del matmul_82, parameter_172 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_13 = paddle._C_ops.relu(add_124) + del add_124 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_112, dropout_113 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_13 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_83 = paddle._C_ops.matmul(dropout_112, parameter_171, False, False) + del dropout_112, parameter_171 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_125 = paddle._C_ops.add(matmul_83, parameter_170) + del matmul_83, parameter_170 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_114, dropout_115 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_125, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_125 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_126 = paddle._C_ops.add(dropout_114, layer_norm_78) + del dropout_114, layer_norm_78 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_175, parameter_174, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_126, parameter_174, parameter_175 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_81, parameter_169, False, False) + del parameter_169 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(matmul_84, full_int_array_5) + del matmul_84 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_81, parameter_168, False, False) + del parameter_168 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(matmul_85, full_int_array_5) + del matmul_85 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_86 = paddle._C_ops.matmul(layer_norm_81, parameter_167, False, False) + del parameter_167 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_100 = paddle._C_ops.reshape(matmul_86, full_int_array_5) + del matmul_86 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_87 = paddle._C_ops.matmul(dropout_2, parameter_165, False, False) + del parameter_165 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_101 = paddle._C_ops.reshape(matmul_87, full_int_array_6) + del matmul_87 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_127 = paddle._C_ops.add(reshape_98, parameter_162) + del parameter_162 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_86 = [add_127, reshape_99] + del add_127, reshape_99 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_255, einsum_256, einsum_257 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_86, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_86 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_340, + split_341, + ) = einsum_256 + del einsum_256 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_342, + split_343, + ) = einsum_257 + del einsum_257 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_128 = paddle._C_ops.add(reshape_98, parameter_164) + del parameter_164 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_87 = [add_128, reshape_101] + del add_128, reshape_101 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_258, einsum_259, einsum_260 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_87, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_87 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_344, + split_345, + ) = einsum_259 + del einsum_259 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_346, + split_347, + ) = einsum_260 + del einsum_260 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(einsum_258, full_int_array_7) + del einsum_258 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + reshape_102, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_102 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_103 = paddle._C_ops.reshape(slice_14, full_int_array_9) + del slice_14 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_14 = paddle._C_ops.index_select(reshape_103, arange_2, 3) + del reshape_103 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_129 = paddle._C_ops.add(reshape_98, parameter_163) + del parameter_163, reshape_98 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_88 = [add_129, parameter_161] + del add_129, parameter_161 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_261, einsum_262, einsum_263 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_88, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_88 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_348, + split_349, + ) = einsum_262 + del einsum_262 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_350, + split_351, + ) = einsum_263 + del einsum_263 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_89 = [cast_5, einsum_261] + del einsum_261 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_264, einsum_265, einsum_266 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_89, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_89 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_352, + split_353, + ) = einsum_265 + del einsum_265 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_354, + split_355, + ) = einsum_266 + del einsum_266 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_130 = paddle._C_ops.add(einsum_255, index_select_14) + del einsum_255, index_select_14 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_131 = paddle._C_ops.add(add_130, einsum_264) + del add_130, einsum_264 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_18 = paddle._C_ops.scale(add_131, full_16, float("0"), True) + del add_131 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_14 = paddle._C_ops.subtract(scale_18, scale_4) + del scale_18 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_14 = paddle._C_ops.softmax(subtract_14, 3) + del subtract_14 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_116, dropout_117 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_14, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_14 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_90 = [dropout_116, reshape_100] + del dropout_116, reshape_100 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_267, einsum_268, einsum_269 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_90, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_90 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_356, + split_357, + ) = einsum_268 + del einsum_268 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_358, + split_359, + ) = einsum_269 + del einsum_269 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_104 = paddle._C_ops.reshape(einsum_267, full_int_array_10) + del einsum_267 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_91 = [reshape_104, parameter_166] + del parameter_166, reshape_104 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_270, einsum_271, einsum_272 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_91, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_91 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_360, + split_361, + ) = einsum_271 + del einsum_271 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_362, + split_363, + ) = einsum_272 + del einsum_272 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_118, dropout_119 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_270, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_270 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_132 = paddle._C_ops.add(dropout_118, layer_norm_81) + del dropout_118, layer_norm_81 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_160, parameter_159, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_132, parameter_159, parameter_160 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_88 = paddle._C_ops.matmul(layer_norm_84, parameter_156, False, False) + del parameter_156 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_133 = paddle._C_ops.add(matmul_88, parameter_155) + del matmul_88, parameter_155 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_14 = paddle._C_ops.relu(add_133) + del add_133 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_120, dropout_121 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_14, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_14 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_89 = paddle._C_ops.matmul(dropout_120, parameter_154, False, False) + del dropout_120, parameter_154 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_134 = paddle._C_ops.add(matmul_89, parameter_153) + del matmul_89, parameter_153 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_122, dropout_123 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_134, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_134 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_135 = paddle._C_ops.add(dropout_122, layer_norm_84) + del dropout_122, layer_norm_84 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_135, parameter_158, parameter_157, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_135, parameter_157, parameter_158 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_87, parameter_152, False, False) + del parameter_152 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(matmul_90, full_int_array_5) + del matmul_90 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_87, parameter_151, False, False) + del parameter_151 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(matmul_91, full_int_array_5) + del matmul_91 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_92 = paddle._C_ops.matmul(layer_norm_87, parameter_150, False, False) + del parameter_150 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(matmul_92, full_int_array_5) + del matmul_92 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_93 = paddle._C_ops.matmul(dropout_2, parameter_148, False, False) + del parameter_148 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(matmul_93, full_int_array_6) + del matmul_93 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_136 = paddle._C_ops.add(reshape_105, parameter_145) + del parameter_145 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_92 = [add_136, reshape_106] + del add_136, reshape_106 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_273, einsum_274, einsum_275 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_92, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_92 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_364, + split_365, + ) = einsum_274 + del einsum_274 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_366, + split_367, + ) = einsum_275 + del einsum_275 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_137 = paddle._C_ops.add(reshape_105, parameter_147) + del parameter_147 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_93 = [add_137, reshape_108] + del add_137, reshape_108 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_276, einsum_277, einsum_278 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_93, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_93 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_368, + split_369, + ) = einsum_277 + del einsum_277 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_370, + split_371, + ) = einsum_278 + del einsum_278 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_109 = paddle._C_ops.reshape(einsum_276, full_int_array_7) + del einsum_276 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + reshape_109, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_109 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_110 = paddle._C_ops.reshape(slice_15, full_int_array_9) + del slice_15 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_15 = paddle._C_ops.index_select(reshape_110, arange_2, 3) + del reshape_110 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_138 = paddle._C_ops.add(reshape_105, parameter_146) + del parameter_146, reshape_105 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_94 = [add_138, parameter_144] + del add_138, parameter_144 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_279, einsum_280, einsum_281 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_94, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_94 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_372, + split_373, + ) = einsum_280 + del einsum_280 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_374, + split_375, + ) = einsum_281 + del einsum_281 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_95 = [cast_5, einsum_279] + del einsum_279 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_282, einsum_283, einsum_284 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_95, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_95 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_376, + split_377, + ) = einsum_283 + del einsum_283 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_378, + split_379, + ) = einsum_284 + del einsum_284 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_139 = paddle._C_ops.add(einsum_273, index_select_15) + del einsum_273, index_select_15 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_140 = paddle._C_ops.add(add_139, einsum_282) + del add_139, einsum_282 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_19 = paddle._C_ops.scale(add_140, full_16, float("0"), True) + del add_140 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_15 = paddle._C_ops.subtract(scale_19, scale_4) + del scale_19 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_15 = paddle._C_ops.softmax(subtract_15, 3) + del subtract_15 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_124, dropout_125 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_15 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_96 = [dropout_124, reshape_107] + del dropout_124, reshape_107 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_285, einsum_286, einsum_287 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_96, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_96 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_380, + split_381, + ) = einsum_286 + del einsum_286 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_382, + split_383, + ) = einsum_287 + del einsum_287 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(einsum_285, full_int_array_10) + del einsum_285 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_97 = [reshape_111, parameter_149] + del parameter_149, reshape_111 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_288, einsum_289, einsum_290 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_97, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_97 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_384, + split_385, + ) = einsum_289 + del einsum_289 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_386, + split_387, + ) = einsum_290 + del einsum_290 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_126, dropout_127 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_288, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_288 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_141 = paddle._C_ops.add(dropout_126, layer_norm_87) + del dropout_126, layer_norm_87 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_141, parameter_143, parameter_142, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_141, parameter_142, parameter_143 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_94 = paddle._C_ops.matmul(layer_norm_90, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_142 = paddle._C_ops.add(matmul_94, parameter_138) + del matmul_94, parameter_138 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_15 = paddle._C_ops.relu(add_142) + del add_142 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_128, dropout_129 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_15 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_95 = paddle._C_ops.matmul(dropout_128, parameter_137, False, False) + del dropout_128, parameter_137 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_143 = paddle._C_ops.add(matmul_95, parameter_136) + del matmul_95, parameter_136 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_130, dropout_131 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_143, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_143 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_144 = paddle._C_ops.add(dropout_130, layer_norm_90) + del dropout_130, layer_norm_90 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_144, parameter_141, parameter_140, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_144, parameter_140, parameter_141 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_93, parameter_135, False, False) + del parameter_135 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(matmul_96, full_int_array_5) + del matmul_96 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(layer_norm_93, parameter_134, False, False) + del parameter_134 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(matmul_97, full_int_array_5) + del matmul_97 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_98 = paddle._C_ops.matmul(layer_norm_93, parameter_133, False, False) + del parameter_133 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_114 = paddle._C_ops.reshape(matmul_98, full_int_array_5) + del matmul_98 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_99 = paddle._C_ops.matmul(dropout_2, parameter_131, False, False) + del parameter_131 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(matmul_99, full_int_array_6) + del matmul_99 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_145 = paddle._C_ops.add(reshape_112, parameter_128) + del parameter_128 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_98 = [add_145, reshape_113] + del add_145, reshape_113 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_291, einsum_292, einsum_293 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_98, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_98 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_388, + split_389, + ) = einsum_292 + del einsum_292 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_390, + split_391, + ) = einsum_293 + del einsum_293 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_146 = paddle._C_ops.add(reshape_112, parameter_130) + del parameter_130 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_99 = [add_146, reshape_115] + del add_146, reshape_115 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_294, einsum_295, einsum_296 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_99, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_99 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_392, + split_393, + ) = einsum_295 + del einsum_295 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_394, + split_395, + ) = einsum_296 + del einsum_296 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(einsum_294, full_int_array_7) + del einsum_294 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + reshape_116, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_116 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(slice_16, full_int_array_9) + del slice_16 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_16 = paddle._C_ops.index_select(reshape_117, arange_2, 3) + del reshape_117 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_147 = paddle._C_ops.add(reshape_112, parameter_129) + del parameter_129, reshape_112 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_100 = [add_147, parameter_127] + del add_147, parameter_127 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_297, einsum_298, einsum_299 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_100, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_100 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_396, + split_397, + ) = einsum_298 + del einsum_298 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_398, + split_399, + ) = einsum_299 + del einsum_299 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_101 = [cast_5, einsum_297] + del einsum_297 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_300, einsum_301, einsum_302 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_101, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_101 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_400, + split_401, + ) = einsum_301 + del einsum_301 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_402, + split_403, + ) = einsum_302 + del einsum_302 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_148 = paddle._C_ops.add(einsum_291, index_select_16) + del einsum_291, index_select_16 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_149 = paddle._C_ops.add(add_148, einsum_300) + del add_148, einsum_300 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_20 = paddle._C_ops.scale(add_149, full_16, float("0"), True) + del add_149 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_16 = paddle._C_ops.subtract(scale_20, scale_4) + del scale_20 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_16 = paddle._C_ops.softmax(subtract_16, 3) + del subtract_16 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_132, dropout_133 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_16, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_16 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_102 = [dropout_132, reshape_114] + del dropout_132, reshape_114 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_303, einsum_304, einsum_305 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_102, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_102 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_404, + split_405, + ) = einsum_304 + del einsum_304 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_406, + split_407, + ) = einsum_305 + del einsum_305 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(einsum_303, full_int_array_10) + del einsum_303 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_103 = [reshape_118, parameter_132] + del parameter_132, reshape_118 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_306, einsum_307, einsum_308 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_103, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_103 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_408, + split_409, + ) = einsum_307 + del einsum_307 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_410, + split_411, + ) = einsum_308 + del einsum_308 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_134, dropout_135 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_306, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_306 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_150 = paddle._C_ops.add(dropout_134, layer_norm_93) + del dropout_134, layer_norm_93 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_150, parameter_126, parameter_125, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_150, parameter_125, parameter_126 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_96, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_151 = paddle._C_ops.add(matmul_100, parameter_121) + del matmul_100, parameter_121 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_16 = paddle._C_ops.relu(add_151) + del add_151 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_136, dropout_137 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_16, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_16 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_101 = paddle._C_ops.matmul(dropout_136, parameter_120, False, False) + del dropout_136, parameter_120 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_152 = paddle._C_ops.add(matmul_101, parameter_119) + del matmul_101, parameter_119 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_138, dropout_139 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_152, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_152 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_153 = paddle._C_ops.add(dropout_138, layer_norm_96) + del dropout_138, layer_norm_96 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_153, parameter_124, parameter_123, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_153, parameter_123, parameter_124 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_99, parameter_118, False, False) + del parameter_118 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(matmul_102, full_int_array_5) + del matmul_102 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_103 = paddle._C_ops.matmul(layer_norm_99, parameter_117, False, False) + del parameter_117 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_120 = paddle._C_ops.reshape(matmul_103, full_int_array_5) + del matmul_103 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_104 = paddle._C_ops.matmul(layer_norm_99, parameter_116, False, False) + del parameter_116 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(matmul_104, full_int_array_5) + del matmul_104 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_105 = paddle._C_ops.matmul(dropout_2, parameter_114, False, False) + del parameter_114 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(matmul_105, full_int_array_6) + del matmul_105 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_154 = paddle._C_ops.add(reshape_119, parameter_111) + del parameter_111 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_104 = [add_154, reshape_120] + del add_154, reshape_120 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_309, einsum_310, einsum_311 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_104, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_104 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_412, + split_413, + ) = einsum_310 + del einsum_310 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_414, + split_415, + ) = einsum_311 + del einsum_311 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_155 = paddle._C_ops.add(reshape_119, parameter_113) + del parameter_113 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_105 = [add_155, reshape_122] + del add_155, reshape_122 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_312, einsum_313, einsum_314 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_105, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_105 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_416, + split_417, + ) = einsum_313 + del einsum_313 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_418, + split_419, + ) = einsum_314 + del einsum_314 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(einsum_312, full_int_array_7) + del einsum_312 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + reshape_123, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_123 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(slice_17, full_int_array_9) + del slice_17 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_17 = paddle._C_ops.index_select(reshape_124, arange_2, 3) + del reshape_124 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_156 = paddle._C_ops.add(reshape_119, parameter_112) + del parameter_112, reshape_119 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_106 = [add_156, parameter_110] + del add_156, parameter_110 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_315, einsum_316, einsum_317 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_106, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_106 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_420, + split_421, + ) = einsum_316 + del einsum_316 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_422, + split_423, + ) = einsum_317 + del einsum_317 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_107 = [cast_5, einsum_315] + del einsum_315 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_318, einsum_319, einsum_320 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_107, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_107 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_424, + split_425, + ) = einsum_319 + del einsum_319 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_426, + split_427, + ) = einsum_320 + del einsum_320 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_157 = paddle._C_ops.add(einsum_309, index_select_17) + del einsum_309, index_select_17 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_158 = paddle._C_ops.add(add_157, einsum_318) + del add_157, einsum_318 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_21 = paddle._C_ops.scale(add_158, full_16, float("0"), True) + del add_158 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_17 = paddle._C_ops.subtract(scale_21, scale_4) + del scale_21 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_17 = paddle._C_ops.softmax(subtract_17, 3) + del subtract_17 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_140, dropout_141 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_17 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_108 = [dropout_140, reshape_121] + del dropout_140, reshape_121 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_321, einsum_322, einsum_323 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_108, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_108 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_428, + split_429, + ) = einsum_322 + del einsum_322 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_430, + split_431, + ) = einsum_323 + del einsum_323 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_125 = paddle._C_ops.reshape(einsum_321, full_int_array_10) + del einsum_321 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_109 = [reshape_125, parameter_115] + del parameter_115, reshape_125 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_324, einsum_325, einsum_326 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_109, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_109 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_432, + split_433, + ) = einsum_325 + del einsum_325 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_434, + split_435, + ) = einsum_326 + del einsum_326 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_142, dropout_143 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_324, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_324 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_159 = paddle._C_ops.add(dropout_142, layer_norm_99) + del dropout_142, layer_norm_99 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_159, parameter_109, parameter_108, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_159, parameter_108, parameter_109 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_106 = paddle._C_ops.matmul(layer_norm_102, parameter_105, False, False) + del parameter_105 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_160 = paddle._C_ops.add(matmul_106, parameter_104) + del matmul_106, parameter_104 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_17 = paddle._C_ops.relu(add_160) + del add_160 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_144, dropout_145 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_17 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_107 = paddle._C_ops.matmul(dropout_144, parameter_103, False, False) + del dropout_144, parameter_103 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_161 = paddle._C_ops.add(matmul_107, parameter_102) + del matmul_107, parameter_102 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_146, dropout_147 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_161, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_161 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_162 = paddle._C_ops.add(dropout_146, layer_norm_102) + del dropout_146, layer_norm_102 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_162, parameter_107, parameter_106, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_162, parameter_106, parameter_107 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_108 = paddle._C_ops.matmul(layer_norm_105, parameter_101, False, False) + del parameter_101 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_126 = paddle._C_ops.reshape(matmul_108, full_int_array_5) + del matmul_108 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_109 = paddle._C_ops.matmul(layer_norm_105, parameter_100, False, False) + del parameter_100 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_127 = paddle._C_ops.reshape(matmul_109, full_int_array_5) + del matmul_109 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_105, parameter_99, False, False) + del parameter_99 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_128 = paddle._C_ops.reshape(matmul_110, full_int_array_5) + del matmul_110 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_111 = paddle._C_ops.matmul(dropout_2, parameter_97, False, False) + del parameter_97 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(matmul_111, full_int_array_6) + del matmul_111 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_163 = paddle._C_ops.add(reshape_126, parameter_94) + del parameter_94 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_110 = [add_163, reshape_127] + del add_163, reshape_127 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_327, einsum_328, einsum_329 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_110, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_110 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_436, + split_437, + ) = einsum_328 + del einsum_328 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_438, + split_439, + ) = einsum_329 + del einsum_329 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_164 = paddle._C_ops.add(reshape_126, parameter_96) + del parameter_96 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_111 = [add_164, reshape_129] + del add_164, reshape_129 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_330, einsum_331, einsum_332 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_111, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_111 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_440, + split_441, + ) = einsum_331 + del einsum_331 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_442, + split_443, + ) = einsum_332 + del einsum_332 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(einsum_330, full_int_array_7) + del einsum_330 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + reshape_130, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_130 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(slice_18, full_int_array_9) + del slice_18 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_18 = paddle._C_ops.index_select(reshape_131, arange_2, 3) + del reshape_131 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_165 = paddle._C_ops.add(reshape_126, parameter_95) + del parameter_95, reshape_126 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_112 = [add_165, parameter_93] + del add_165, parameter_93 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_333, einsum_334, einsum_335 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_112, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_112 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_444, + split_445, + ) = einsum_334 + del einsum_334 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_446, + split_447, + ) = einsum_335 + del einsum_335 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_113 = [cast_5, einsum_333] + del einsum_333 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_336, einsum_337, einsum_338 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_113, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_113 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_448, + split_449, + ) = einsum_337 + del einsum_337 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_450, + split_451, + ) = einsum_338 + del einsum_338 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_166 = paddle._C_ops.add(einsum_327, index_select_18) + del einsum_327, index_select_18 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_167 = paddle._C_ops.add(add_166, einsum_336) + del add_166, einsum_336 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_22 = paddle._C_ops.scale(add_167, full_16, float("0"), True) + del add_167 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_18 = paddle._C_ops.subtract(scale_22, scale_4) + del scale_22 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_18 = paddle._C_ops.softmax(subtract_18, 3) + del subtract_18 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_148, dropout_149 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_18 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_114 = [dropout_148, reshape_128] + del dropout_148, reshape_128 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_339, einsum_340, einsum_341 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_114, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_114 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_452, + split_453, + ) = einsum_340 + del einsum_340 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_454, + split_455, + ) = einsum_341 + del einsum_341 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_132 = paddle._C_ops.reshape(einsum_339, full_int_array_10) + del einsum_339 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_115 = [reshape_132, parameter_98] + del parameter_98, reshape_132 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_342, einsum_343, einsum_344 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_115, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_115 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_456, + split_457, + ) = einsum_343 + del einsum_343 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_458, + split_459, + ) = einsum_344 + del einsum_344 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_150, dropout_151 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_342, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_342 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_168 = paddle._C_ops.add(dropout_150, layer_norm_105) + del dropout_150, layer_norm_105 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_92, parameter_91, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_168, parameter_91, parameter_92 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_108, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_169 = paddle._C_ops.add(matmul_112, parameter_87) + del matmul_112, parameter_87 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_18 = paddle._C_ops.relu(add_169) + del add_169 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_152, dropout_153 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_18 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_113 = paddle._C_ops.matmul(dropout_152, parameter_86, False, False) + del dropout_152, parameter_86 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_170 = paddle._C_ops.add(matmul_113, parameter_85) + del matmul_113, parameter_85 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_154, dropout_155 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_170, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_170 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_171 = paddle._C_ops.add(dropout_154, layer_norm_108) + del dropout_154, layer_norm_108 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_171, parameter_90, parameter_89, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_171, parameter_89, parameter_90 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_111, parameter_84, False, False) + del parameter_84 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(matmul_114, full_int_array_5) + del matmul_114 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_115 = paddle._C_ops.matmul(layer_norm_111, parameter_83, False, False) + del parameter_83 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_134 = paddle._C_ops.reshape(matmul_115, full_int_array_5) + del matmul_115 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_111, parameter_82, False, False) + del parameter_82 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(matmul_116, full_int_array_5) + del matmul_116 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_117 = paddle._C_ops.matmul(dropout_2, parameter_80, False, False) + del parameter_80 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_136 = paddle._C_ops.reshape(matmul_117, full_int_array_6) + del matmul_117 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_172 = paddle._C_ops.add(reshape_133, parameter_77) + del parameter_77 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_116 = [add_172, reshape_134] + del add_172, reshape_134 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_345, einsum_346, einsum_347 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_116, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_116 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_460, + split_461, + ) = einsum_346 + del einsum_346 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_462, + split_463, + ) = einsum_347 + del einsum_347 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_173 = paddle._C_ops.add(reshape_133, parameter_79) + del parameter_79 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_117 = [add_173, reshape_136] + del add_173, reshape_136 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_348, einsum_349, einsum_350 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_117, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_117 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_464, + split_465, + ) = einsum_349 + del einsum_349 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_466, + split_467, + ) = einsum_350 + del einsum_350 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(einsum_348, full_int_array_7) + del einsum_348 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + reshape_137, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_137 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(slice_19, full_int_array_9) + del slice_19 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_19 = paddle._C_ops.index_select(reshape_138, arange_2, 3) + del reshape_138 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_174 = paddle._C_ops.add(reshape_133, parameter_78) + del parameter_78, reshape_133 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_118 = [add_174, parameter_76] + del add_174, parameter_76 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_351, einsum_352, einsum_353 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_118, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_118 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_468, + split_469, + ) = einsum_352 + del einsum_352 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_470, + split_471, + ) = einsum_353 + del einsum_353 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_119 = [cast_5, einsum_351] + del einsum_351 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_354, einsum_355, einsum_356 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_119, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_119 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_472, + split_473, + ) = einsum_355 + del einsum_355 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_474, + split_475, + ) = einsum_356 + del einsum_356 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_175 = paddle._C_ops.add(einsum_345, index_select_19) + del einsum_345, index_select_19 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_176 = paddle._C_ops.add(add_175, einsum_354) + del add_175, einsum_354 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_23 = paddle._C_ops.scale(add_176, full_16, float("0"), True) + del add_176 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_19 = paddle._C_ops.subtract(scale_23, scale_4) + del scale_23 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_19 = paddle._C_ops.softmax(subtract_19, 3) + del subtract_19 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_156, dropout_157 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_19, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_19 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_120 = [dropout_156, reshape_135] + del dropout_156, reshape_135 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_357, einsum_358, einsum_359 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_120, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_120 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_476, + split_477, + ) = einsum_358 + del einsum_358 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_478, + split_479, + ) = einsum_359 + del einsum_359 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_139 = paddle._C_ops.reshape(einsum_357, full_int_array_10) + del einsum_357 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_121 = [reshape_139, parameter_81] + del parameter_81, reshape_139 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_360, einsum_361, einsum_362 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_121, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_121 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_480, + split_481, + ) = einsum_361 + del einsum_361 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_482, + split_483, + ) = einsum_362 + del einsum_362 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_158, dropout_159 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_360, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_360 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_177 = paddle._C_ops.add(dropout_158, layer_norm_111) + del dropout_158, layer_norm_111 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_177, parameter_75, parameter_74, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_177, parameter_74, parameter_75 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_118 = paddle._C_ops.matmul(layer_norm_114, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_178 = paddle._C_ops.add(matmul_118, parameter_70) + del matmul_118, parameter_70 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_19 = paddle._C_ops.relu(add_178) + del add_178 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_160, dropout_161 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_19, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_19 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_119 = paddle._C_ops.matmul(dropout_160, parameter_69, False, False) + del dropout_160, parameter_69 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_179 = paddle._C_ops.add(matmul_119, parameter_68) + del matmul_119, parameter_68 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_162, dropout_163 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_179, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_179 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_180 = paddle._C_ops.add(dropout_162, layer_norm_114) + del dropout_162, layer_norm_114 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_180, parameter_73, parameter_72, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_180, parameter_72, parameter_73 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_117, parameter_67, False, False) + del parameter_67 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(matmul_120, full_int_array_5) + del matmul_120 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_117, parameter_66, False, False) + del parameter_66 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(matmul_121, full_int_array_5) + del matmul_121 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_122 = paddle._C_ops.matmul(layer_norm_117, parameter_65, False, False) + del parameter_65 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(matmul_122, full_int_array_5) + del matmul_122 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_123 = paddle._C_ops.matmul(dropout_2, parameter_63, False, False) + del parameter_63 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_143 = paddle._C_ops.reshape(matmul_123, full_int_array_6) + del matmul_123 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_181 = paddle._C_ops.add(reshape_140, parameter_60) + del parameter_60 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_122 = [add_181, reshape_141] + del add_181, reshape_141 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_363, einsum_364, einsum_365 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_122, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_122 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_484, + split_485, + ) = einsum_364 + del einsum_364 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_486, + split_487, + ) = einsum_365 + del einsum_365 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_182 = paddle._C_ops.add(reshape_140, parameter_62) + del parameter_62 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_123 = [add_182, reshape_143] + del add_182, reshape_143 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_366, einsum_367, einsum_368 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_123, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_123 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_488, + split_489, + ) = einsum_367 + del einsum_367 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_490, + split_491, + ) = einsum_368 + del einsum_368 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_144 = paddle._C_ops.reshape(einsum_366, full_int_array_7) + del einsum_366 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + reshape_144, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_144 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_145 = paddle._C_ops.reshape(slice_20, full_int_array_9) + del slice_20 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_20 = paddle._C_ops.index_select(reshape_145, arange_2, 3) + del reshape_145 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_183 = paddle._C_ops.add(reshape_140, parameter_61) + del parameter_61, reshape_140 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_124 = [add_183, parameter_59] + del add_183, parameter_59 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_369, einsum_370, einsum_371 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_124, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_124 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_492, + split_493, + ) = einsum_370 + del einsum_370 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_494, + split_495, + ) = einsum_371 + del einsum_371 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_125 = [cast_5, einsum_369] + del einsum_369 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_372, einsum_373, einsum_374 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_125, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_125 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_496, + split_497, + ) = einsum_373 + del einsum_373 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_498, + split_499, + ) = einsum_374 + del einsum_374 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_184 = paddle._C_ops.add(einsum_363, index_select_20) + del einsum_363, index_select_20 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_185 = paddle._C_ops.add(add_184, einsum_372) + del add_184, einsum_372 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_24 = paddle._C_ops.scale(add_185, full_16, float("0"), True) + del add_185 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_20 = paddle._C_ops.subtract(scale_24, scale_4) + del scale_24 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_20 = paddle._C_ops.softmax(subtract_20, 3) + del subtract_20 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_164, dropout_165 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_20, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_20 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_126 = [dropout_164, reshape_142] + del dropout_164, reshape_142 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_375, einsum_376, einsum_377 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_126, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_126 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_500, + split_501, + ) = einsum_376 + del einsum_376 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_502, + split_503, + ) = einsum_377 + del einsum_377 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_146 = paddle._C_ops.reshape(einsum_375, full_int_array_10) + del einsum_375 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_127 = [reshape_146, parameter_64] + del parameter_64, reshape_146 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_378, einsum_379, einsum_380 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_127, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_127 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_504, + split_505, + ) = einsum_379 + del einsum_379 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_506, + split_507, + ) = einsum_380 + del einsum_380 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_166, dropout_167 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_378, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_378 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_186 = paddle._C_ops.add(dropout_166, layer_norm_117) + del dropout_166, layer_norm_117 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_186, parameter_58, parameter_57, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_186, parameter_57, parameter_58 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_124 = paddle._C_ops.matmul(layer_norm_120, parameter_54, False, False) + del parameter_54 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_187 = paddle._C_ops.add(matmul_124, parameter_53) + del matmul_124, parameter_53 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_20 = paddle._C_ops.relu(add_187) + del add_187 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_168, dropout_169 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_20, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_20 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_125 = paddle._C_ops.matmul(dropout_168, parameter_52, False, False) + del dropout_168, parameter_52 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_188 = paddle._C_ops.add(matmul_125, parameter_51) + del matmul_125, parameter_51 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_170, dropout_171 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_188, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_188 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_189 = paddle._C_ops.add(dropout_170, layer_norm_120) + del dropout_170, layer_norm_120 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_189, parameter_56, parameter_55, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_189, parameter_55, parameter_56 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_123, parameter_50, False, False) + del parameter_50 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(matmul_126, full_int_array_5) + del matmul_126 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_127 = paddle._C_ops.matmul(layer_norm_123, parameter_49, False, False) + del parameter_49 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(matmul_127, full_int_array_5) + del matmul_127 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_128 = paddle._C_ops.matmul(layer_norm_123, parameter_48, False, False) + del parameter_48 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(matmul_128, full_int_array_5) + del matmul_128 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_129 = paddle._C_ops.matmul(dropout_2, parameter_46, False, False) + del parameter_46 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(matmul_129, full_int_array_6) + del matmul_129 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_190 = paddle._C_ops.add(reshape_147, parameter_43) + del parameter_43 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_128 = [add_190, reshape_148] + del add_190, reshape_148 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_381, einsum_382, einsum_383 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_128, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_128 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_508, + split_509, + ) = einsum_382 + del einsum_382 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_510, + split_511, + ) = einsum_383 + del einsum_383 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_191 = paddle._C_ops.add(reshape_147, parameter_45) + del parameter_45 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_129 = [add_191, reshape_150] + del add_191, reshape_150 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_384, einsum_385, einsum_386 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_129, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_129 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_512, + split_513, + ) = einsum_385 + del einsum_385 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_514, + split_515, + ) = einsum_386 + del einsum_386 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_151 = paddle._C_ops.reshape(einsum_384, full_int_array_7) + del einsum_384 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + reshape_151, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_151 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(slice_21, full_int_array_9) + del slice_21 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_21 = paddle._C_ops.index_select(reshape_152, arange_2, 3) + del reshape_152 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_192 = paddle._C_ops.add(reshape_147, parameter_44) + del parameter_44, reshape_147 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_130 = [add_192, parameter_42] + del add_192, parameter_42 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_387, einsum_388, einsum_389 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_130, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_130 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_516, + split_517, + ) = einsum_388 + del einsum_388 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_518, + split_519, + ) = einsum_389 + del einsum_389 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_131 = [cast_5, einsum_387] + del einsum_387 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_390, einsum_391, einsum_392 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_131, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_131 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_520, + split_521, + ) = einsum_391 + del einsum_391 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_522, + split_523, + ) = einsum_392 + del einsum_392 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_193 = paddle._C_ops.add(einsum_381, index_select_21) + del einsum_381, index_select_21 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_194 = paddle._C_ops.add(add_193, einsum_390) + del add_193, einsum_390 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_25 = paddle._C_ops.scale(add_194, full_16, float("0"), True) + del add_194 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_21 = paddle._C_ops.subtract(scale_25, scale_4) + del scale_25 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_21 = paddle._C_ops.softmax(subtract_21, 3) + del subtract_21 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_172, dropout_173 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_21 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_132 = [dropout_172, reshape_149] + del dropout_172, reshape_149 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_393, einsum_394, einsum_395 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_132, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_132 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_524, + split_525, + ) = einsum_394 + del einsum_394 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_526, + split_527, + ) = einsum_395 + del einsum_395 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_153 = paddle._C_ops.reshape(einsum_393, full_int_array_10) + del einsum_393 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_133 = [reshape_153, parameter_47] + del parameter_47, reshape_153 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_396, einsum_397, einsum_398 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_133, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_133 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_528, + split_529, + ) = einsum_397 + del einsum_397 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_530, + split_531, + ) = einsum_398 + del einsum_398 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_174, dropout_175 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_396, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_396 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_195 = paddle._C_ops.add(dropout_174, layer_norm_123) + del dropout_174, layer_norm_123 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_195, parameter_41, parameter_40, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_195, parameter_40, parameter_41 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_130 = paddle._C_ops.matmul(layer_norm_126, parameter_37, False, False) + del parameter_37 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_196 = paddle._C_ops.add(matmul_130, parameter_36) + del matmul_130, parameter_36 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_21 = paddle._C_ops.relu(add_196) + del add_196 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_176, dropout_177 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_21 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_131 = paddle._C_ops.matmul(dropout_176, parameter_35, False, False) + del dropout_176, parameter_35 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_197 = paddle._C_ops.add(matmul_131, parameter_34) + del matmul_131, parameter_34 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_178, dropout_179 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_197, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_197 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_198 = paddle._C_ops.add(dropout_178, layer_norm_126) + del dropout_178, layer_norm_126 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_198, parameter_39, parameter_38, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_198, parameter_38, parameter_39 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_132 = paddle._C_ops.matmul(layer_norm_129, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_154 = paddle._C_ops.reshape(matmul_132, full_int_array_5) + del matmul_132 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_133 = paddle._C_ops.matmul(layer_norm_129, parameter_32, False, False) + del parameter_32 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(matmul_133, full_int_array_5) + del matmul_133 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_129, parameter_31, False, False) + del parameter_31 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(matmul_134, full_int_array_5) + del matmul_134 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_135 = paddle._C_ops.matmul(dropout_2, parameter_29, False, False) + del parameter_29 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_157 = paddle._C_ops.reshape(matmul_135, full_int_array_6) + del matmul_135 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_199 = paddle._C_ops.add(reshape_154, parameter_26) + del parameter_26 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_134 = [add_199, reshape_155] + del add_199, reshape_155 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_399, einsum_400, einsum_401 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_134, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_134 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_532, + split_533, + ) = einsum_400 + del einsum_400 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_534, + split_535, + ) = einsum_401 + del einsum_401 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_200 = paddle._C_ops.add(reshape_154, parameter_28) + del parameter_28 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_135 = [add_200, reshape_157] + del add_200, reshape_157 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_402, einsum_403, einsum_404 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_135, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_135 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_536, + split_537, + ) = einsum_403 + del einsum_403 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_538, + split_539, + ) = einsum_404 + del einsum_404 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(einsum_402, full_int_array_7) + del einsum_402 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + reshape_158, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_158 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(slice_22, full_int_array_9) + del slice_22 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_22 = paddle._C_ops.index_select(reshape_159, arange_2, 3) + del reshape_159 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_201 = paddle._C_ops.add(reshape_154, parameter_27) + del parameter_27, reshape_154 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_136 = [add_201, parameter_25] + del add_201, parameter_25 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_405, einsum_406, einsum_407 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_136, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_136 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_540, + split_541, + ) = einsum_406 + del einsum_406 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_542, + split_543, + ) = einsum_407 + del einsum_407 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_137 = [cast_5, einsum_405] + del einsum_405 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_408, einsum_409, einsum_410 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_137, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_137 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_544, + split_545, + ) = einsum_409 + del einsum_409 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_546, + split_547, + ) = einsum_410 + del einsum_410 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_202 = paddle._C_ops.add(einsum_399, index_select_22) + del einsum_399, index_select_22 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_203 = paddle._C_ops.add(add_202, einsum_408) + del add_202, einsum_408 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_26 = paddle._C_ops.scale(add_203, full_16, float("0"), True) + del add_203 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_22 = paddle._C_ops.subtract(scale_26, scale_4) + del scale_26 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_22 = paddle._C_ops.softmax(subtract_22, 3) + del subtract_22 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_180, dropout_181 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_22, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_22 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_138 = [dropout_180, reshape_156] + del dropout_180, reshape_156 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_411, einsum_412, einsum_413 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_138, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_138 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_548, + split_549, + ) = einsum_412 + del einsum_412 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_550, + split_551, + ) = einsum_413 + del einsum_413 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(einsum_411, full_int_array_10) + del einsum_411 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_139 = [reshape_160, parameter_30] + del parameter_30, reshape_160 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_414, einsum_415, einsum_416 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_139, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_139 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_552, + split_553, + ) = einsum_415 + del einsum_415 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_554, + split_555, + ) = einsum_416 + del einsum_416 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_182, dropout_183 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_414, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_414 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_204 = paddle._C_ops.add(dropout_182, layer_norm_129) + del dropout_182, layer_norm_129 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_204, parameter_24, parameter_23, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_204, parameter_23, parameter_24 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_136 = paddle._C_ops.matmul(layer_norm_132, parameter_20, False, False) + del parameter_20 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_205 = paddle._C_ops.add(matmul_136, parameter_19) + del matmul_136, parameter_19 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_22 = paddle._C_ops.relu(add_205) + del add_205 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_184, dropout_185 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_22, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_22 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_137 = paddle._C_ops.matmul(dropout_184, parameter_18, False, False) + del dropout_184, parameter_18 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_206 = paddle._C_ops.add(matmul_137, parameter_17) + del matmul_137, parameter_17 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_186, dropout_187 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_206, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_206 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_207 = paddle._C_ops.add(dropout_186, layer_norm_132) + del dropout_186, layer_norm_132 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_207, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_207, parameter_21, parameter_22 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_138 = paddle._C_ops.matmul(layer_norm_135, parameter_16, False, False) + del parameter_16 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_161 = paddle._C_ops.reshape(matmul_138, full_int_array_5) + del matmul_138 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_139 = paddle._C_ops.matmul(layer_norm_135, parameter_15, False, False) + del parameter_15 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_162 = paddle._C_ops.reshape(matmul_139, full_int_array_5) + del matmul_139 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x768xf32, 768x768xf32) + matmul_140 = paddle._C_ops.matmul(layer_norm_135, parameter_14, False, False) + del parameter_14 + + # pd_op.reshape: (9x1x12x64xf32) <- (9x1x768xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(matmul_140, full_int_array_5) + del full_int_array_5, matmul_140 + + # pd_op.matmul: (18x1x768xf32) <- (18x1x768xf32, 768x768xf32) + matmul_141 = paddle._C_ops.matmul(dropout_2, parameter_12, False, False) + del dropout_2, parameter_12 + + # pd_op.reshape: (18x1x12x64xf32) <- (18x1x768xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(matmul_141, full_int_array_6) + del full_int_array_6, matmul_141 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_208 = paddle._C_ops.add(reshape_161, parameter_9) + del parameter_9 + + # builtin.combine: ([9x1x12x64xf32, 9x1x12x64xf32]) <- (9x1x12x64xf32, 9x1x12x64xf32) + combine_140 = [add_208, reshape_162] + del add_208, reshape_162 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x1x12x64xf32, 9x1x12x64xf32]) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + einsum_417, einsum_418, einsum_419 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_140, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_140 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_556, + split_557, + ) = einsum_418 + del einsum_418 + + # builtin.split: (9x1x12x64xf32, 9x1x12x64xf32) <- ([9x1x12x64xf32, 9x1x12x64xf32]) + ( + split_558, + split_559, + ) = einsum_419 + del einsum_419 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_209 = paddle._C_ops.add(reshape_161, parameter_11) + del parameter_11 + + # builtin.combine: ([9x1x12x64xf32, 18x1x12x64xf32]) <- (9x1x12x64xf32, 18x1x12x64xf32) + combine_141 = [add_209, reshape_164] + del add_209, reshape_164 + + # pd_op.einsum: (1x12x9x18xf32, [0xf32, 0xf32], [9x1x12x64xf32, 18x1x12x64xf32]) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + einsum_420, einsum_421, einsum_422 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_141, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_141 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_560, + split_561, + ) = einsum_421 + del einsum_421 + + # builtin.split: (9x1x12x64xf32, 18x1x12x64xf32) <- ([9x1x12x64xf32, 18x1x12x64xf32]) + ( + split_562, + split_563, + ) = einsum_422 + del einsum_422 + + # pd_op.reshape: (1x12x18x9xf32) <- (1x12x9x18xf32, 4xi64) + reshape_165 = paddle._C_ops.reshape(einsum_420, full_int_array_7) + del einsum_420, full_int_array_7 + + # pd_op.slice: (1x12x17x9xf32) <- (1x12x18x9xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + reshape_165, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del full_int_array_3, full_int_array_8, reshape_165 + + # pd_op.reshape: (1x12x9x17xf32) <- (1x12x17x9xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(slice_23, full_int_array_9) + del full_int_array_9, slice_23 + + # pd_op.index_select: (1x12x9x9xf32) <- (1x12x9x17xf32, 9xi64) + index_select_23 = paddle._C_ops.index_select(reshape_166, arange_2, 3) + del arange_2, reshape_166 + + # pd_op.add: (9x1x12x64xf32) <- (9x1x12x64xf32, 12x64xf32) + add_210 = paddle._C_ops.add(reshape_161, parameter_10) + del parameter_10, reshape_161 + + # builtin.combine: ([9x1x12x64xf32, 2x12x64xf32]) <- (9x1x12x64xf32, 2x12x64xf32) + combine_142 = [add_210, parameter_8] + del add_210, parameter_8 + + # pd_op.einsum: (9x1x12x2xf32, [0xf32, 0xf32], [9x1x12x64xf32, 2x12x64xf32]) <- ([9x1x12x64xf32, 2x12x64xf32]) + einsum_423, einsum_424, einsum_425 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_142, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_142 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_564, + split_565, + ) = einsum_424 + del einsum_424 + + # builtin.split: (9x1x12x64xf32, 2x12x64xf32) <- ([9x1x12x64xf32, 2x12x64xf32]) + ( + split_566, + split_567, + ) = einsum_425 + del einsum_425 + + # builtin.combine: ([9x9x1x2xf32, 9x1x12x2xf32]) <- (9x9x1x2xf32, 9x1x12x2xf32) + combine_143 = [cast_5, einsum_423] + del cast_5, einsum_423 + + # pd_op.einsum: (1x12x9x9xf32, [0xf32, 0xf32], [9x9x1x2xf32, 9x1x12x2xf32]) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + einsum_426, einsum_427, einsum_428 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_143, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_143 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_568, + split_569, + ) = einsum_427 + del einsum_427 + + # builtin.split: (9x9x1x2xf32, 9x1x12x2xf32) <- ([9x9x1x2xf32, 9x1x12x2xf32]) + ( + split_570, + split_571, + ) = einsum_428 + del einsum_428 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_211 = paddle._C_ops.add(einsum_417, index_select_23) + del einsum_417, index_select_23 + + # pd_op.add: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x12x9x9xf32) + add_212 = paddle._C_ops.add(add_211, einsum_426) + del add_211, einsum_426 + + # pd_op.scale: (1x12x9x9xf32) <- (1x12x9x9xf32, 1xf32) + scale_27 = paddle._C_ops.scale(add_212, full_16, float("0"), True) + del add_212, full_16 + + # pd_op.subtract: (1x12x9x9xf32) <- (1x12x9x9xf32, 1x1x9x9xf32) + subtract_23 = paddle._C_ops.subtract(scale_27, scale_4) + del scale_27, scale_4 + + # pd_op.softmax: (1x12x9x9xf32) <- (1x12x9x9xf32) + softmax_23 = paddle._C_ops.softmax(subtract_23, 3) + del subtract_23 + + # pd_op.dropout: (1x12x9x9xf32, 1x12x9x9xui8) <- (1x12x9x9xf32, None, 1xf32) + dropout_188, dropout_189 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_23 + + # builtin.combine: ([1x12x9x9xf32, 9x1x12x64xf32]) <- (1x12x9x9xf32, 9x1x12x64xf32) + combine_144 = [dropout_188, reshape_163] + del dropout_188, reshape_163 + + # pd_op.einsum: (9x1x12x64xf32, [0xf32, 0xf32], [1x12x9x9xf32, 9x1x12x64xf32]) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + einsum_429, einsum_430, einsum_431 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_144, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_144 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_572, + split_573, + ) = einsum_430 + del einsum_430 + + # builtin.split: (1x12x9x9xf32, 9x1x12x64xf32) <- ([1x12x9x9xf32, 9x1x12x64xf32]) + ( + split_574, + split_575, + ) = einsum_431 + del einsum_431 + + # pd_op.reshape: (9x1x768xf32) <- (9x1x12x64xf32, 3xi64) + reshape_167 = paddle._C_ops.reshape(einsum_429, full_int_array_10) + del einsum_429, full_int_array_10 + + # builtin.combine: ([9x1x768xf32, 768x768xf32]) <- (9x1x768xf32, 768x768xf32) + combine_145 = [reshape_167, parameter_13] + del parameter_13, reshape_167 + + # pd_op.einsum: (9x1x768xf32, [0xf32, 0xf32], [9x1x768xf32, 768x768xf32]) <- ([9x1x768xf32, 768x768xf32]) + einsum_432, einsum_433, einsum_434 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_145, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_145 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_576, + split_577, + ) = einsum_433 + del einsum_433 + + # builtin.split: (9x1x768xf32, 768x768xf32) <- ([9x1x768xf32, 768x768xf32]) + ( + split_578, + split_579, + ) = einsum_434 + del einsum_434 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_190, dropout_191 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_432, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_432 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_213 = paddle._C_ops.add(dropout_190, layer_norm_135) + del dropout_190, layer_norm_135 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_213, parameter_7, parameter_6, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_213, parameter_6, parameter_7 + + # pd_op.matmul: (9x1x3072xf32) <- (9x1x768xf32, 768x3072xf32) + matmul_142 = paddle._C_ops.matmul(layer_norm_138, parameter_3, False, False) + del parameter_3 + + # pd_op.add: (9x1x3072xf32) <- (9x1x3072xf32, 3072xf32) + add_214 = paddle._C_ops.add(matmul_142, parameter_2) + del matmul_142, parameter_2 + + # pd_op.relu: (9x1x3072xf32) <- (9x1x3072xf32) + relu_23 = paddle._C_ops.relu(add_214) + del add_214 + + # pd_op.dropout: (9x1x3072xf32, 9x1x3072xui8) <- (9x1x3072xf32, None, 1xf32) + dropout_192, dropout_193 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_23 + + # pd_op.matmul: (9x1x768xf32) <- (9x1x3072xf32, 3072x768xf32) + matmul_143 = paddle._C_ops.matmul(dropout_192, parameter_1, False, False) + del dropout_192, parameter_1 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 768xf32) + add_215 = paddle._C_ops.add(matmul_143, parameter_0) + del matmul_143, parameter_0 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_194, dropout_195 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_215, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_215 + + # pd_op.add: (9x1x768xf32) <- (9x1x768xf32, 9x1x768xf32) + add_216 = paddle._C_ops.add(dropout_194, layer_norm_138) + del dropout_194, layer_norm_138 + + # pd_op.layer_norm: (9x1x768xf32, 9x1xf32, 9x1xf32) <- (9x1x768xf32, 768xf32, 768xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_216, parameter_5, parameter_4, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_216, parameter_4, parameter_5 + + # pd_op.dropout: (9x1x768xf32, 9x1x768xui8) <- (9x1x768xf32, None, 1xf32) + dropout_196, dropout_197 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_141, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_3, layer_norm_141 + + # pd_op.transpose: (1x9x768xf32) <- (9x1x768xf32) + transpose_0 = paddle._C_ops.transpose(dropout_196, [1, 0, 2]) + del dropout_196 + + return transpose_0 diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-mid/weight_meta.py b/paddle_samples/PaddleNLP/chinese-xlnet-mid/weight_meta.py new file mode 100644 index 000000000..150aa4ab9 --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-mid/weight_meta.py @@ -0,0 +1,4076 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0992385") + max_val = float("0.102875") + mean = float("-2.87104e-05") + std = float("0.019993") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.104082") + max_val = float("0.0932895") + mean = float("-3.06962e-05") + std = float("0.0200036") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0685599") + max_val = float("0.0658963") + mean = float("-0.00024417") + std = float("0.0204489") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0651445") + max_val = float("0.0592371") + mean = float("-2.21256e-05") + std = float("0.019764") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0556366") + max_val = float("0.0566928") + mean = float("0.000764188") + std = float("0.0203508") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0525137") + max_val = float("0.06786") + mean = float("0.000539884") + std = float("0.0197949") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.087482") + max_val = float("0.0960763") + mean = float("2.87498e-05") + std = float("0.0199794") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0949354") + max_val = float("0.0935933") + mean = float("-6.02944e-06") + std = float("0.0200094") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916653") + max_val = float("0.1009") + mean = float("8.5081e-08") + std = float("0.0200007") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.103925") + max_val = float("0.110142") + mean = float("1.3423e-05") + std = float("0.0200011") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0951989") + max_val = float("0.0954426") + mean = float("1.36197e-05") + std = float("0.0200253") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0996241") + max_val = float("0.102408") + mean = float("7.37253e-06") + std = float("0.020012") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0932743") + max_val = float("0.0952639") + mean = float("-5.89792e-06") + std = float("0.0199952") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0613574") + max_val = float("0.0706353") + mean = float("0.000535646") + std = float("0.0201446") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0598611") + max_val = float("0.0500983") + mean = float("-0.00102849") + std = float("0.0188322") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0727003") + max_val = float("0.0524254") + mean = float("-1.29274e-05") + std = float("0.0203901") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0786873") + max_val = float("0.0639895") + mean = float("-0.000650006") + std = float("0.0200197") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0899222") + max_val = float("0.102717") + mean = float("-1.20835e-05") + std = float("0.0199781") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0949745") + max_val = float("0.0943316") + mean = float("1.70752e-05") + std = float("0.0199991") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.091913") + max_val = float("0.0885693") + mean = float("1.49961e-05") + std = float("0.0200166") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.10528") + max_val = float("0.101579") + mean = float("-4.47538e-05") + std = float("0.0199851") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0964461") + max_val = float("0.0988549") + mean = float("-4.61212e-05") + std = float("0.0199858") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104177") + max_val = float("0.100672") + mean = float("1.29213e-05") + std = float("0.0200155") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.103687") + max_val = float("0.0960466") + mean = float("-3.52816e-06") + std = float("0.0200105") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0791351") + max_val = float("0.0603129") + mean = float("0.000312459") + std = float("0.0208899") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0616493") + max_val = float("0.074826") + mean = float("0.00114984") + std = float("0.0216077") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0519985") + max_val = float("0.0661832") + mean = float("-0.0010894") + std = float("0.0196826") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0660265") + max_val = float("0.0629053") + mean = float("0.000248182") + std = float("0.0198376") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0989029") + max_val = float("0.0909414") + mean = float("9.17577e-06") + std = float("0.0199848") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0923946") + max_val = float("0.0960927") + mean = float("-3.97571e-05") + std = float("0.0200341") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.104238") + max_val = float("0.0966158") + mean = float("2.18959e-05") + std = float("0.0200147") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.095079") + max_val = float("0.0929343") + mean = float("5.30684e-05") + std = float("0.0200075") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.094298") + max_val = float("0.100605") + mean = float("9.54803e-06") + std = float("0.0199927") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0964315") + max_val = float("0.0995373") + mean = float("-1.751e-05") + std = float("0.0200114") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0963421") + max_val = float("0.104976") + mean = float("-1.665e-05") + std = float("0.0199913") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0641725") + max_val = float("0.0668724") + mean = float("-0.000221256") + std = float("0.0201518") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.058031") + max_val = float("0.0713647") + mean = float("0.000548997") + std = float("0.0198842") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0533915") + max_val = float("0.0730677") + mean = float("-0.0011631") + std = float("0.0215013") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0613344") + max_val = float("0.0760659") + mean = float("-0.0004222") + std = float("0.0204239") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101617") + max_val = float("0.0938434") + mean = float("2.8609e-05") + std = float("0.0199775") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0954409") + max_val = float("0.091044") + mean = float("1.51252e-05") + std = float("0.0199946") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0960642") + max_val = float("0.0992712") + mean = float("-3.47097e-05") + std = float("0.0200068") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0978587") + max_val = float("0.0919599") + mean = float("-3.31101e-06") + std = float("0.0199954") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0969195") + max_val = float("0.100116") + mean = float("-9.60172e-06") + std = float("0.0200226") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0996224") + max_val = float("0.104063") + mean = float("-8.36708e-06") + std = float("0.0199974") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.100492") + max_val = float("0.110091") + mean = float("-2.8787e-05") + std = float("0.0200235") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0772095") + max_val = float("0.0721144") + mean = float("0.000643863") + std = float("0.0204347") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0637782") + max_val = float("0.0591677") + mean = float("0.000285496") + std = float("0.0197946") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0705864") + max_val = float("0.0700757") + mean = float("0.000367466") + std = float("0.021182") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0582186") + max_val = float("0.0636393") + mean = float("0.000220073") + std = float("0.0199499") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0923404") + max_val = float("0.0956992") + mean = float("3.59996e-05") + std = float("0.0199696") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0900342") + max_val = float("0.0959493") + mean = float("-3.62642e-06") + std = float("0.020046") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100684") + max_val = float("0.0980237") + mean = float("2.45752e-05") + std = float("0.0199999") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0884012") + max_val = float("0.101642") + mean = float("1.70642e-06") + std = float("0.0200474") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0967449") + max_val = float("0.0925632") + mean = float("-3.19217e-05") + std = float("0.0199837") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0980523") + max_val = float("0.0972812") + mean = float("1.10406e-06") + std = float("0.0199914") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.102435") + max_val = float("0.092993") + mean = float("1.72811e-05") + std = float("0.0199927") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0604049") + max_val = float("0.066894") + mean = float("0.000595404") + std = float("0.0204146") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0614719") + max_val = float("0.0783159") + mean = float("6.12256e-05") + std = float("0.0208108") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0741644") + max_val = float("0.067392") + mean = float("0.00107734") + std = float("0.0202505") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0629797") + max_val = float("0.062391") + mean = float("0.000485195") + std = float("0.0197896") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.088169") + max_val = float("0.102024") + mean = float("3.27016e-05") + std = float("0.0199833") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0988853") + max_val = float("0.0935851") + mean = float("-1.99768e-05") + std = float("0.0200103") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0930175") + max_val = float("0.0980104") + mean = float("8.8307e-06") + std = float("0.0199685") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101789") + max_val = float("0.0885144") + mean = float("-2.49534e-05") + std = float("0.0200085") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0991208") + max_val = float("0.0927385") + mean = float("-1.51644e-05") + std = float("0.0200208") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.106835") + max_val = float("0.0931437") + mean = float("-1.74689e-05") + std = float("0.0200132") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.105098") + max_val = float("0.102486") + mean = float("2.18372e-05") + std = float("0.0200075") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0547854") + max_val = float("0.0617458") + mean = float("-0.000391915") + std = float("0.0198446") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0608988") + max_val = float("0.0550923") + mean = float("-0.000148594") + std = float("0.019672") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0556389") + max_val = float("0.0547488") + mean = float("-0.000872696") + std = float("0.0192708") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0621615") + max_val = float("0.0675371") + mean = float("0.00106426") + std = float("0.0195946") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0986293") + max_val = float("0.0882185") + mean = float("4.02172e-05") + std = float("0.0200042") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916852") + max_val = float("0.107538") + mean = float("8.26962e-07") + std = float("0.0199815") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0972519") + max_val = float("0.0972278") + mean = float("7.19291e-06") + std = float("0.019994") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0909221") + max_val = float("0.0950249") + mean = float("2.08483e-05") + std = float("0.0199805") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100268") + max_val = float("0.0911824") + mean = float("5.09453e-07") + std = float("0.0199752") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0976225") + max_val = float("0.102656") + mean = float("-1.5584e-06") + std = float("0.0200065") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0999746") + max_val = float("0.108195") + mean = float("1.10189e-05") + std = float("0.0200027") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0665081") + max_val = float("0.0582528") + mean = float("0.000132215") + std = float("0.0197637") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.07594") + max_val = float("0.0596196") + mean = float("0.000565413") + std = float("0.0200123") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0611587") + max_val = float("0.0609616") + mean = float("0.00161991") + std = float("0.0208715") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0701204") + max_val = float("0.0499476") + mean = float("-0.00112387") + std = float("0.0196437") + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.103737") + max_val = float("0.092225") + mean = float("1.58704e-05") + std = float("0.0199673") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0978492") + max_val = float("0.11001") + mean = float("-1.83536e-05") + std = float("0.0200326") + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0957246") + max_val = float("0.0955106") + mean = float("-7.83377e-06") + std = float("0.0200067") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101451") + max_val = float("0.0968324") + mean = float("7.90246e-06") + std = float("0.0200042") + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0982089") + max_val = float("0.0935521") + mean = float("-1.45475e-05") + std = float("0.020008") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0976562") + max_val = float("0.103721") + mean = float("5.02149e-06") + std = float("0.019994") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0998944") + max_val = float("0.0967743") + mean = float("-1.02369e-05") + std = float("0.0200073") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0688756") + max_val = float("0.0572315") + mean = float("0.000214926") + std = float("0.0201615") + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0580335") + max_val = float("0.0685004") + mean = float("-0.000466845") + std = float("0.0198299") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0569562") + max_val = float("0.0720309") + mean = float("-0.000175533") + std = float("0.0193007") + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0580931") + max_val = float("0.0665461") + mean = float("0.000556311") + std = float("0.0207681") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0911821") + max_val = float("0.0995433") + mean = float("-3.68664e-05") + std = float("0.020012") + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916099") + max_val = float("0.092629") + mean = float("-6.98546e-05") + std = float("0.0199792") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0930813") + max_val = float("0.0881732") + mean = float("-1.58426e-05") + std = float("0.0199799") + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0982105") + max_val = float("0.100704") + mean = float("-1.72037e-05") + std = float("0.0199786") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0915965") + max_val = float("0.0910249") + mean = float("-1.76717e-05") + std = float("0.019996") + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0986691") + max_val = float("0.0982143") + mean = float("-5.27154e-06") + std = float("0.0200064") + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0996236") + max_val = float("0.104955") + mean = float("-5.87859e-06") + std = float("0.0200043") + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0555636") + max_val = float("0.0782154") + mean = float("0.000150856") + std = float("0.0193841") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.071413") + max_val = float("0.0603286") + mean = float("0.000112784") + std = float("0.020672") + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0584897") + max_val = float("0.0601304") + mean = float("-0.000590898") + std = float("0.0200303") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0555021") + max_val = float("0.0644254") + mean = float("3.81104e-05") + std = float("0.0193433") + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.11498") + max_val = float("0.0938218") + mean = float("-3.18497e-05") + std = float("0.0199834") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0924328") + max_val = float("0.0910954") + mean = float("2.18907e-05") + std = float("0.0199983") + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0949582") + max_val = float("0.090847") + mean = float("2.31445e-05") + std = float("0.0199713") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0951275") + max_val = float("0.0979711") + mean = float("-3.66977e-06") + std = float("0.0200251") + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.10068") + max_val = float("0.0961136") + mean = float("-1.67871e-05") + std = float("0.0199725") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.10016") + max_val = float("0.105534") + mean = float("-1.36326e-06") + std = float("0.0200011") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.10346") + max_val = float("0.10523") + mean = float("-7.98683e-06") + std = float("0.0199884") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0911722") + max_val = float("0.0610926") + mean = float("-0.0007564") + std = float("0.0203989") + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0605921") + max_val = float("0.0597681") + mean = float("-0.00103852") + std = float("0.0196262") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0705175") + max_val = float("0.0598262") + mean = float("0.000798028") + std = float("0.0199337") + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.087768") + max_val = float("0.0571883") + mean = float("-2.54134e-05") + std = float("0.0206943") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0866027") + max_val = float("0.0895184") + mean = float("-1.29514e-05") + std = float("0.0200018") + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0960835") + max_val = float("0.0997401") + mean = float("-3.28011e-05") + std = float("0.0200195") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0935858") + max_val = float("0.096516") + mean = float("-2.14898e-05") + std = float("0.0200093") + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0941115") + max_val = float("0.093538") + mean = float("1.04981e-05") + std = float("0.0200066") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0968532") + max_val = float("0.100836") + mean = float("-2.9778e-06") + std = float("0.0199773") + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0981239") + max_val = float("0.0954288") + mean = float("-3.92532e-06") + std = float("0.0200006") + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101791") + max_val = float("0.0980343") + mean = float("1.28602e-06") + std = float("0.0200032") + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.065808") + max_val = float("0.0693584") + mean = float("0.000553734") + std = float("0.0205792") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.066239") + max_val = float("0.0623056") + mean = float("-0.000268497") + std = float("0.0201592") + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0632628") + max_val = float("0.0560296") + mean = float("0.000155703") + std = float("0.0197991") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0657599") + max_val = float("0.0579888") + mean = float("0.000647882") + std = float("0.0196466") + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0975053") + max_val = float("0.105776") + mean = float("1.88585e-06") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0982419") + max_val = float("0.0904129") + mean = float("3.3203e-07") + std = float("0.019978") + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0888827") + max_val = float("0.0927375") + mean = float("4.33929e-06") + std = float("0.0200373") + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0893663") + max_val = float("0.0907593") + mean = float("-3.60806e-05") + std = float("0.0199947") + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102359") + max_val = float("0.0893301") + mean = float("-2.53916e-05") + std = float("0.0199954") + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0988273") + max_val = float("0.100349") + mean = float("3.3034e-06") + std = float("0.0199979") + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0940568") + max_val = float("0.0932684") + mean = float("-1.35575e-05") + std = float("0.0199953") + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0678443") + max_val = float("0.0617379") + mean = float("-0.000463799") + std = float("0.0197932") + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0669562") + max_val = float("0.0779093") + mean = float("0.000177708") + std = float("0.0197129") + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0635541") + max_val = float("0.055896") + mean = float("0.000457738") + std = float("0.0206174") + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0667683") + max_val = float("0.0568582") + mean = float("0.00164259") + std = float("0.0200097") + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0894739") + max_val = float("0.0899545") + mean = float("1.34708e-05") + std = float("0.0200055") + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0920965") + max_val = float("0.0973643") + mean = float("-2.36284e-05") + std = float("0.0199965") + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0883198") + max_val = float("0.09633") + mean = float("-2.10673e-05") + std = float("0.0199832") + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0941603") + max_val = float("0.101693") + mean = float("-1.7868e-05") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.098849") + max_val = float("0.100235") + mean = float("6.90275e-06") + std = float("0.0200029") + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0939877") + max_val = float("0.100145") + mean = float("4.96509e-06") + std = float("0.0200054") + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0976788") + max_val = float("0.0981202") + mean = float("2.95791e-06") + std = float("0.0199983") + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0716485") + max_val = float("0.0704555") + mean = float("-0.000306408") + std = float("0.0195271") + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0631485") + max_val = float("0.0608671") + mean = float("-0.000783707") + std = float("0.020623") + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0612836") + max_val = float("0.0664824") + mean = float("0.00169708") + std = float("0.0201459") + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0617977") + max_val = float("0.0757013") + mean = float("-0.000942041") + std = float("0.020341") + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0959046") + max_val = float("0.0905154") + mean = float("1.3832e-06") + std = float("0.0199986") + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0897725") + max_val = float("0.0903606") + mean = float("4.65935e-05") + std = float("0.0200113") + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0944441") + max_val = float("0.0876855") + mean = float("-6.82516e-06") + std = float("0.0199964") + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0988587") + max_val = float("0.0928436") + mean = float("-1.03861e-05") + std = float("0.0199971") + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0938641") + max_val = float("0.0884428") + mean = float("-7.12503e-05") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104088") + max_val = float("0.100674") + mean = float("-1.2354e-05") + std = float("0.0200057") + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0940645") + max_val = float("0.0992474") + mean = float("-5.84661e-06") + std = float("0.0200021") + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0733826") + max_val = float("0.079843") + mean = float("0.000584072") + std = float("0.0198898") + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0745035") + max_val = float("0.0678794") + mean = float("-0.000124665") + std = float("0.0214412") + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0676443") + max_val = float("0.0639757") + mean = float("6.72338e-05") + std = float("0.0196634") + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0600977") + max_val = float("0.0674271") + mean = float("-0.00155526") + std = float("0.0202025") + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0895744") + max_val = float("0.0918048") + mean = float("1.12035e-05") + std = float("0.020037") + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0942934") + max_val = float("0.0938409") + mean = float("-1.89081e-05") + std = float("0.0199621") + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0951055") + max_val = float("0.0909101") + mean = float("-1.28031e-05") + std = float("0.0200067") + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0920752") + max_val = float("0.0936272") + mean = float("-2.56825e-05") + std = float("0.020006") + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0959548") + max_val = float("0.104327") + mean = float("3.32833e-05") + std = float("0.0199962") + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.111898") + max_val = float("0.0971337") + mean = float("-2.79149e-06") + std = float("0.0199909") + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.102543") + max_val = float("0.103272") + mean = float("-6.29099e-06") + std = float("0.0199911") + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0662811") + max_val = float("0.0793151") + mean = float("0.000483764") + std = float("0.0204926") + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0720237") + max_val = float("0.052449") + mean = float("-0.000461651") + std = float("0.0193515") + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0613431") + max_val = float("0.0557613") + mean = float("-0.000658027") + std = float("0.0211547") + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0708073") + max_val = float("0.0662909") + mean = float("0.000246101") + std = float("0.0199279") + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0930493") + max_val = float("0.089579") + mean = float("1.37958e-05") + std = float("0.0199762") + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0937447") + max_val = float("0.0937204") + mean = float("-6.0748e-06") + std = float("0.0200273") + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0956679") + max_val = float("0.0934901") + mean = float("-1.70272e-05") + std = float("0.0200205") + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100066") + max_val = float("0.0950881") + mean = float("2.65998e-05") + std = float("0.020013") + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0982124") + max_val = float("0.0920513") + mean = float("-3.49801e-05") + std = float("0.020012") + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104792") + max_val = float("0.104386") + mean = float("-4.51588e-06") + std = float("0.0199906") + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0990749") + max_val = float("0.101253") + mean = float("1.19078e-05") + std = float("0.0199982") + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.068423") + max_val = float("0.0730132") + mean = float("-0.000575068") + std = float("0.019558") + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0573987") + max_val = float("0.0538829") + mean = float("-0.00022042") + std = float("0.0199234") + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0614962") + max_val = float("0.0730006") + mean = float("-0.00100148") + std = float("0.0199159") + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0621623") + max_val = float("0.0578604") + mean = float("0.00118948") + std = float("0.0205245") + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0905907") + max_val = float("0.0886716") + mean = float("-1.19075e-05") + std = float("0.0199876") + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0940388") + max_val = float("0.0943725") + mean = float("-6.69905e-05") + std = float("0.0199879") + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0986769") + max_val = float("0.0952609") + mean = float("-6.63148e-06") + std = float("0.0199987") + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0968987") + max_val = float("0.0907544") + mean = float("-4.93519e-06") + std = float("0.0199667") + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101947") + max_val = float("0.0935621") + mean = float("5.38941e-05") + std = float("0.0200096") + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.116381") + max_val = float("0.0972943") + mean = float("-1.31732e-05") + std = float("0.0200032") + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0968845") + max_val = float("0.104371") + mean = float("-1.48792e-05") + std = float("0.0200016") + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0608554") + max_val = float("0.0718633") + mean = float("-0.000365229") + std = float("0.019887") + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0620168") + max_val = float("0.0497455") + mean = float("-0.00156908") + std = float("0.0200533") + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0796537") + max_val = float("0.0544007") + mean = float("0.000334917") + std = float("0.0197958") + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0624352") + max_val = float("0.0535833") + mean = float("-0.000144914") + std = float("0.0197108") + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0979601") + max_val = float("0.0979061") + mean = float("2.01737e-05") + std = float("0.0200193") + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0933974") + max_val = float("0.0889042") + mean = float("-2.23701e-05") + std = float("0.0199779") + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0922369") + max_val = float("0.0956675") + mean = float("-2.1542e-05") + std = float("0.0200272") + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.094925") + max_val = float("0.0936901") + mean = float("-2.6579e-05") + std = float("0.0200015") + data = None + + +class Program_weight_tensor_parameter_305: + name = "parameter_305" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0972304") + max_val = float("0.0887948") + mean = float("4.11008e-05") + std = float("0.0199814") + data = None + + +class Program_weight_tensor_parameter_306: + name = "parameter_306" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_307: + name = "parameter_307" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.102771") + max_val = float("0.1207") + mean = float("2.85613e-06") + std = float("0.02") + data = None + + +class Program_weight_tensor_parameter_308: + name = "parameter_308" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_309: + name = "parameter_309" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.10016") + max_val = float("0.0966208") + mean = float("1.10829e-05") + std = float("0.0200145") + data = None + + +class Program_weight_tensor_parameter_310: + name = "parameter_310" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_311: + name = "parameter_311" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_312: + name = "parameter_312" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_313: + name = "parameter_313" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_314: + name = "parameter_314" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0639653") + max_val = float("0.0644856") + mean = float("0.000140734") + std = float("0.0204312") + data = None + + +class Program_weight_tensor_parameter_315: + name = "parameter_315" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0630646") + max_val = float("0.0496056") + mean = float("-0.000972034") + std = float("0.0195664") + data = None + + +class Program_weight_tensor_parameter_316: + name = "parameter_316" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0683311") + max_val = float("0.060249") + mean = float("0.000509113") + std = float("0.0204194") + data = None + + +class Program_weight_tensor_parameter_317: + name = "parameter_317" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0867553") + max_val = float("0.0595023") + mean = float("-0.000186983") + std = float("0.0202237") + data = None + + +class Program_weight_tensor_parameter_318: + name = "parameter_318" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0949215") + max_val = float("0.0929923") + mean = float("-1.48893e-05") + std = float("0.0200072") + data = None + + +class Program_weight_tensor_parameter_319: + name = "parameter_319" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0915239") + max_val = float("0.094362") + mean = float("9.13134e-06") + std = float("0.0199928") + data = None + + +class Program_weight_tensor_parameter_320: + name = "parameter_320" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0939734") + max_val = float("0.0930242") + mean = float("-3.54406e-05") + std = float("0.0200039") + data = None + + +class Program_weight_tensor_parameter_321: + name = "parameter_321" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102151") + max_val = float("0.0971843") + mean = float("1.4659e-05") + std = float("0.0199957") + data = None + + +class Program_weight_tensor_parameter_322: + name = "parameter_322" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.096152") + max_val = float("0.0939488") + mean = float("4.81302e-06") + std = float("0.0199878") + data = None + + +class Program_weight_tensor_parameter_323: + name = "parameter_323" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_324: + name = "parameter_324" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104435") + max_val = float("0.0944035") + mean = float("-6.43081e-06") + std = float("0.0199896") + data = None + + +class Program_weight_tensor_parameter_325: + name = "parameter_325" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_326: + name = "parameter_326" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.100915") + max_val = float("0.103063") + mean = float("-1.38518e-05") + std = float("0.0199982") + data = None + + +class Program_weight_tensor_parameter_327: + name = "parameter_327" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_328: + name = "parameter_328" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_329: + name = "parameter_329" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_330: + name = "parameter_330" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_331: + name = "parameter_331" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0655106") + max_val = float("0.0793115") + mean = float("-0.000177196") + std = float("0.0200876") + data = None + + +class Program_weight_tensor_parameter_332: + name = "parameter_332" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0649945") + max_val = float("0.0765837") + mean = float("-0.00035018") + std = float("0.0208553") + data = None + + +class Program_weight_tensor_parameter_333: + name = "parameter_333" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0724614") + max_val = float("0.0697428") + mean = float("-0.000303265") + std = float("0.0198988") + data = None + + +class Program_weight_tensor_parameter_334: + name = "parameter_334" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0579749") + max_val = float("0.0723804") + mean = float("-0.000108606") + std = float("0.0206081") + data = None + + +class Program_weight_tensor_parameter_335: + name = "parameter_335" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0966144") + max_val = float("0.0956888") + mean = float("4.70503e-05") + std = float("0.0199741") + data = None + + +class Program_weight_tensor_parameter_336: + name = "parameter_336" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0944042") + max_val = float("0.0962674") + mean = float("-4.50011e-05") + std = float("0.0199971") + data = None + + +class Program_weight_tensor_parameter_337: + name = "parameter_337" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0971175") + max_val = float("0.0999373") + mean = float("7.93104e-06") + std = float("0.0199647") + data = None + + +class Program_weight_tensor_parameter_338: + name = "parameter_338" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0895577") + max_val = float("0.0978528") + mean = float("6.31164e-06") + std = float("0.0200072") + data = None + + +class Program_weight_tensor_parameter_339: + name = "parameter_339" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0939715") + max_val = float("0.0922678") + mean = float("1.26627e-05") + std = float("0.019993") + data = None + + +class Program_weight_tensor_parameter_340: + name = "parameter_340" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_341: + name = "parameter_341" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0952799") + max_val = float("0.103177") + mean = float("-1.78093e-07") + std = float("0.0199958") + data = None + + +class Program_weight_tensor_parameter_342: + name = "parameter_342" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_343: + name = "parameter_343" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.107197") + max_val = float("0.0964814") + mean = float("1.33163e-05") + std = float("0.0200002") + data = None + + +class Program_weight_tensor_parameter_344: + name = "parameter_344" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_345: + name = "parameter_345" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_346: + name = "parameter_346" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_347: + name = "parameter_347" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_348: + name = "parameter_348" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0651132") + max_val = float("0.0669098") + mean = float("0.000271728") + std = float("0.0200293") + data = None + + +class Program_weight_tensor_parameter_349: + name = "parameter_349" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0566132") + max_val = float("0.0661084") + mean = float("0.000778474") + std = float("0.0201461") + data = None + + +class Program_weight_tensor_parameter_350: + name = "parameter_350" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0573333") + max_val = float("0.0605919") + mean = float("-0.000275255") + std = float("0.0200007") + data = None + + +class Program_weight_tensor_parameter_351: + name = "parameter_351" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0586884") + max_val = float("0.0657262") + mean = float("-0.000724771") + std = float("0.020373") + data = None + + +class Program_weight_tensor_parameter_352: + name = "parameter_352" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0955559") + max_val = float("0.0866505") + mean = float("-1.0279e-05") + std = float("0.0199847") + data = None + + +class Program_weight_tensor_parameter_353: + name = "parameter_353" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.092631") + max_val = float("0.0932036") + mean = float("-1.70493e-05") + std = float("0.0199895") + data = None + + +class Program_weight_tensor_parameter_354: + name = "parameter_354" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102376") + max_val = float("0.0951306") + mean = float("-1.46204e-05") + std = float("0.0199796") + data = None + + +class Program_weight_tensor_parameter_355: + name = "parameter_355" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.106232") + max_val = float("0.0874891") + mean = float("-1.33059e-05") + std = float("0.0199711") + data = None + + +class Program_weight_tensor_parameter_356: + name = "parameter_356" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101956") + max_val = float("0.0935847") + mean = float("-4.34596e-06") + std = float("0.0199674") + data = None + + +class Program_weight_tensor_parameter_357: + name = "parameter_357" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_358: + name = "parameter_358" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0944472") + max_val = float("0.10012") + mean = float("9.15296e-06") + std = float("0.0199866") + data = None + + +class Program_weight_tensor_parameter_359: + name = "parameter_359" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_360: + name = "parameter_360" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.111544") + max_val = float("0.104288") + mean = float("-8.35623e-06") + std = float("0.0199907") + data = None + + +class Program_weight_tensor_parameter_361: + name = "parameter_361" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_362: + name = "parameter_362" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_363: + name = "parameter_363" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_364: + name = "parameter_364" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_365: + name = "parameter_365" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0598522") + max_val = float("0.0604418") + mean = float("0.000259007") + std = float("0.0199941") + data = None + + +class Program_weight_tensor_parameter_366: + name = "parameter_366" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0635184") + max_val = float("0.0538524") + mean = float("-0.000558313") + std = float("0.020858") + data = None + + +class Program_weight_tensor_parameter_367: + name = "parameter_367" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.076988") + max_val = float("0.0773655") + mean = float("-0.000799213") + std = float("0.0194891") + data = None + + +class Program_weight_tensor_parameter_368: + name = "parameter_368" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.052951") + max_val = float("0.0621607") + mean = float("0.0011092") + std = float("0.0196794") + data = None + + +class Program_weight_tensor_parameter_369: + name = "parameter_369" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0949259") + max_val = float("0.0924379") + mean = float("-1.43068e-05") + std = float("0.019978") + data = None + + +class Program_weight_tensor_parameter_370: + name = "parameter_370" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.098106") + max_val = float("0.0937175") + mean = float("4.33239e-05") + std = float("0.0200302") + data = None + + +class Program_weight_tensor_parameter_371: + name = "parameter_371" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950287") + max_val = float("0.0926024") + mean = float("5.24463e-06") + std = float("0.0200123") + data = None + + +class Program_weight_tensor_parameter_372: + name = "parameter_372" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0971058") + max_val = float("0.0922806") + mean = float("-4.05307e-05") + std = float("0.0199756") + data = None + + +class Program_weight_tensor_parameter_373: + name = "parameter_373" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.104123") + max_val = float("0.0927598") + mean = float("-2.64591e-05") + std = float("0.020012") + data = None + + +class Program_weight_tensor_parameter_374: + name = "parameter_374" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_375: + name = "parameter_375" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0933842") + max_val = float("0.101948") + mean = float("-6.79444e-06") + std = float("0.0199902") + data = None + + +class Program_weight_tensor_parameter_376: + name = "parameter_376" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_377: + name = "parameter_377" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0945606") + max_val = float("0.102819") + mean = float("3.25459e-06") + std = float("0.0199865") + data = None + + +class Program_weight_tensor_parameter_378: + name = "parameter_378" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_379: + name = "parameter_379" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_380: + name = "parameter_380" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_381: + name = "parameter_381" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_382: + name = "parameter_382" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0812335") + max_val = float("0.0642184") + mean = float("0.000128249") + std = float("0.020208") + data = None + + +class Program_weight_tensor_parameter_383: + name = "parameter_383" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0620183") + max_val = float("0.0594621") + mean = float("-0.00105505") + std = float("0.020015") + data = None + + +class Program_weight_tensor_parameter_384: + name = "parameter_384" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0561532") + max_val = float("0.069992") + mean = float("0.000415732") + std = float("0.0194367") + data = None + + +class Program_weight_tensor_parameter_385: + name = "parameter_385" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0606509") + max_val = float("0.0585851") + mean = float("-0.000265057") + std = float("0.0193097") + data = None + + +class Program_weight_tensor_parameter_386: + name = "parameter_386" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0893203") + max_val = float("0.0974871") + mean = float("-3.63362e-06") + std = float("0.0200269") + data = None + + +class Program_weight_tensor_parameter_387: + name = "parameter_387" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0941134") + max_val = float("0.0877735") + mean = float("-2.65744e-05") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_388: + name = "parameter_388" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0936635") + max_val = float("0.0988059") + mean = float("-3.38022e-05") + std = float("0.0199935") + data = None + + +class Program_weight_tensor_parameter_389: + name = "parameter_389" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0959509") + max_val = float("0.0968037") + mean = float("-1.03065e-05") + std = float("0.0200405") + data = None + + +class Program_weight_tensor_parameter_390: + name = "parameter_390" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0890728") + max_val = float("0.0956342") + mean = float("1.55674e-05") + std = float("0.0200325") + data = None + + +class Program_weight_tensor_parameter_391: + name = "parameter_391" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_392: + name = "parameter_392" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0943604") + max_val = float("0.111684") + mean = float("1.39828e-05") + std = float("0.0200019") + data = None + + +class Program_weight_tensor_parameter_393: + name = "parameter_393" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_394: + name = "parameter_394" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.100682") + max_val = float("0.0971403") + mean = float("9.96819e-06") + std = float("0.0200029") + data = None + + +class Program_weight_tensor_parameter_395: + name = "parameter_395" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_396: + name = "parameter_396" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_397: + name = "parameter_397" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_398: + name = "parameter_398" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_399: + name = "parameter_399" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0584292") + max_val = float("0.0763427") + mean = float("-0.000338736") + std = float("0.0201055") + data = None + + +class Program_weight_tensor_parameter_400: + name = "parameter_400" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.063222") + max_val = float("0.0555351") + mean = float("-0.000473943") + std = float("0.0213078") + data = None + + +class Program_weight_tensor_parameter_401: + name = "parameter_401" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0613989") + max_val = float("0.0622419") + mean = float("4.58493e-05") + std = float("0.0193804") + data = None + + +class Program_weight_tensor_parameter_402: + name = "parameter_402" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0739278") + max_val = float("0.0657546") + mean = float("0.000618415") + std = float("0.0209295") + data = None + + +class Program_weight_tensor_parameter_403: + name = "parameter_403" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0947384") + max_val = float("0.0932296") + mean = float("1.10171e-05") + std = float("0.0199879") + data = None + + +class Program_weight_tensor_parameter_404: + name = "parameter_404" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.105606") + max_val = float("0.0955081") + mean = float("2.61478e-05") + std = float("0.0199997") + data = None + + +class Program_weight_tensor_parameter_405: + name = "parameter_405" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0955242") + max_val = float("0.0950305") + mean = float("6.06502e-06") + std = float("0.019978") + data = None + + +class Program_weight_tensor_parameter_406: + name = "parameter_406" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0969456") + max_val = float("0.100197") + mean = float("-3.20375e-05") + std = float("0.0199887") + data = None + + +class Program_weight_tensor_parameter_407: + name = "parameter_407" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.106133") + max_val = float("0.0949257") + mean = float("-1.25246e-07") + std = float("0.0199849") + data = None + + +class Program_weight_tensor_parameter_408: + name = "parameter_408" + shape = [32000, 768] + dtype = "float32" + min_val = float("-0.110157") + max_val = float("0.108801") + mean = float("-8.50089e-07") + std = float("0.019998") + data = None + + +class Program_weight_tensor_parameter_409: + name = "parameter_409" + shape = [1, 1, 768] + dtype = "float32" + min_val = float("-0.060647") + max_val = float("0.0587686") + mean = float("-0.000567059") + std = float("0.0195027") + data = None diff --git a/paddle_samples/PaddleNLP/xlnet-base-cased/graph_net.json b/paddle_samples/PaddleNLP/xlnet-base-cased/graph_net.json new file mode 100644 index 000000000..6260580c5 --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-base-cased/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "xlnet-base-cased", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/xlnet-base-cased/input_meta.py b/paddle_samples/PaddleNLP/xlnet-base-cased/input_meta.py new file mode 100644 index 000000000..feae33c5c --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-base-cased/input_meta.py @@ -0,0 +1,42 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 22] + dtype = "int64" + data = [ + 17, + 11368, + 19, + 94, + 304, + 27, + 2656, + 9, + 35, + 569, + 1899, + 75, + 392, + 1243, + 2626, + 21, + 58, + 4797, + 23, + 9, + 4, + 3, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 22] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 22] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] diff --git a/paddle_samples/PaddleNLP/xlnet-base-cased/model.py b/paddle_samples/PaddleNLP/xlnet-base-cased/model.py new file mode 100644 index 000000000..fa9ce555f --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-base-cased/model.py @@ -0,0 +1,4369 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + data_0, + data_1, + data_2, + ): + # pd_op.transpose: (22x1xi64) <- (1x22xi64) + transpose_1 = paddle._C_ops.transpose(data_0, [1, 0]) + del data_0 + + # pd_op.transpose: (22x1xi64) <- (1x22xi64) + transpose_2 = paddle._C_ops.transpose(data_1, [1, 0]) + del data_1 + + # pd_op.transpose: (22x1xi64) <- (1x22xi64) + transpose_3 = paddle._C_ops.transpose(data_2, [1, 0]) + del data_2 + + # pd_op.cast: (22x1xf32) <- (22x1xi64) + cast_0 = paddle._C_ops.cast(transpose_3, paddle.float32) + del transpose_3 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (22x1xf32) <- (22x1xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.unsqueeze: (1x22x1xf32) <- (22x1xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.unsqueeze: (1x22x1x1xf32) <- (1x22x1xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.greater_than: (1x22x1x1xb) <- (1x22x1x1xf32, xf32) + greater_than_0 = paddle._C_ops.greater_than(unsqueeze_1, full_1) + del unsqueeze_1 + + # pd_op.cast: (1x22x1x1xf32) <- (1x22x1x1xb) + cast_1 = paddle._C_ops.cast(greater_than_0, paddle.float32) + del greater_than_0 + + # pd_op.full: (22xf32) <- () + full_2 = paddle._C_ops.full( + [22], float("1"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.diag: (22x22xf32) <- (22xf32) + diag_0 = paddle._C_ops.diag(full_2, 0, float("0")) + del full_2 + + # pd_op.scale: (22x22xf32) <- (22x22xf32, 1xf32) + scale_1 = paddle._C_ops.scale(diag_0, full_0, float("0"), True) + del diag_0, full_0 + + # pd_op.cast: (22x22xf32) <- (22x22xf32) + cast_2 = paddle._C_ops.cast(scale_1, paddle.float32) + del scale_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [2, 3] + + # pd_op.unsqueeze: (22x22x1x1xf32) <- (22x22xf32, 2xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(cast_2, full_int_array_2) + del cast_2, full_int_array_2 + + # pd_op.add: (22x22x1x1xf32) <- (1x22x1x1xf32, 22x22x1x1xf32) + add_0 = paddle._C_ops.add(cast_1, unsqueeze_2) + del cast_1, unsqueeze_2 + + # pd_op.greater_than: (22x22x1x1xb) <- (22x22x1x1xf32, xf32) + greater_than_1 = paddle._C_ops.greater_than(add_0, full_1) + del add_0, full_1 + + # pd_op.cast: (22x22x1x1xf32) <- (22x22x1x1xb) + cast_3 = paddle._C_ops.cast(greater_than_1, paddle.float32) + del greater_than_1 + + # pd_op.embedding: (22x1x768xf32) <- (22x1xi64, 32000x768xf32) + embedding_0 = paddle._C_ops.embedding(transpose_1, parameter_204, -1, False) + del parameter_204, transpose_1 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [1] + + # pd_op.unsqueeze: (22x1x1xi64) <- (22x1xi64, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_3) + + # pd_op.unsqueeze: (1x22x1xi64) <- (22x1xi64, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_0) + del full_int_array_0, transpose_2 + + # pd_op.not_equal: (22x22x1xb) <- (22x1x1xi64, 1x22x1xi64) + not_equal_0 = paddle._C_ops.not_equal(unsqueeze_3, unsqueeze_4) + del unsqueeze_3, unsqueeze_4 + + # pd_op.cast: (22x22x1xi64) <- (22x22x1xb) + cast_4 = paddle._C_ops.cast(not_equal_0, paddle.int64) + del not_equal_0 + + # pd_op.full: (1xi32) <- () + full_4 = paddle._C_ops.full( + [1], float("2"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.one_hot: (22x22x1x2xf32) <- (22x22x1xi64, 1xi32) + one_hot_0 = paddle._C_ops.one_hot( + cast_4 % paddle.cast(full_4, cast_4.dtype), full_4 + ) + del cast_4, full_4 + + # pd_op.cast: (22x22x1x2xf32) <- (22x22x1x2xf32) + cast_5 = paddle._C_ops.cast(one_hot_0, paddle.float32) + del one_hot_0 + + # pd_op.full: (1xf64) <- () + full_5 = paddle._C_ops.full( + [1], float("0"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_6 = paddle._C_ops.full( + [1], float("768"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_7 = paddle._C_ops.full( + [1], float("2"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (384xf32) <- (1xf64, 1xf64, 1xf64) + arange_0 = paddle.arange(full_5, full_6, full_7, dtype="float32") + del full_6, full_7 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.00130208"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (384xf32) <- (384xf32, 1xf32) + scale_2 = paddle._C_ops.scale(arange_0, full_8, float("0"), True) + del arange_0, full_8 + + # pd_op.full: (384xf32) <- () + full_9 = paddle._C_ops.full( + [384], + float("10000"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.elementwise_pow: (384xf32) <- (384xf32, 384xf32) + elementwise_pow_0 = paddle._C_ops.elementwise_pow(full_9, scale_2) + del full_9, scale_2 + + # pd_op.full: (384xf32) <- () + full_10 = paddle._C_ops.full( + [384], + float("1"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.divide: (384xf32) <- (384xf32, 384xf32) + divide_0 = paddle._C_ops.divide(full_10, elementwise_pow_0) + del elementwise_pow_0, full_10 + + # pd_op.full: (1xf64) <- () + full_11 = paddle._C_ops.full( + [1], float("22"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_12 = paddle._C_ops.full( + [1], float("-22"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_13 = paddle._C_ops.full( + [1], float("-1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (44xf32) <- (1xf64, 1xf64, 1xf64) + arange_1 = paddle.arange(full_11, full_12, full_13, dtype="float32") + del full_12, full_13 + + # builtin.combine: ([44xf32, 384xf32]) <- (44xf32, 384xf32) + combine_0 = [arange_1, divide_0] + del arange_1, divide_0 + + # pd_op.einsum: (44x384xf32, [0xf32, 0xf32], [44xf32, 384xf32]) <- ([44xf32, 384xf32]) + einsum_0, einsum_1, einsum_2 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_0, "i,d->id"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_0 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_0, + split_1, + ) = einsum_1 + del einsum_1 + + # builtin.split: (44xf32, 384xf32) <- ([44xf32, 384xf32]) + ( + split_2, + split_3, + ) = einsum_2 + del einsum_2 + + # pd_op.sin: (44x384xf32) <- (44x384xf32) + sin_0 = paddle._C_ops.sin(einsum_0) + + # pd_op.cos: (44x384xf32) <- (44x384xf32) + cos_0 = paddle._C_ops.cos(einsum_0) + del einsum_0 + + # pd_op.full: (1xi32) <- () + full_14 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([44x384xf32, 44x384xf32]) <- (44x384xf32, 44x384xf32) + combine_1 = [sin_0, cos_0] + del cos_0, sin_0 + + # pd_op.concat: (44x768xf32) <- ([44x384xf32, 44x384xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_1, full_14) + del combine_1, full_14 + + # pd_op.unsqueeze: (44x1x768xf32) <- (44x768xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(concat_0, full_int_array_3) + del concat_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 1, -1] + + # pd_op.expand: (44x1x768xf32) <- (44x1x768xf32, 3xi64) + expand_0 = paddle._C_ops.expand(unsqueeze_5, full_int_array_4) + del full_int_array_4, unsqueeze_5 + + # pd_op.dropout: (44x1x768xf32, 44x1x768xui8) <- (44x1x768xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + expand_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del expand_0 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_203, False, False) + del parameter_203 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_5 = [22, 1, 12, 64] + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_0, full_int_array_5) + del matmul_0 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_202, False, False) + del parameter_202 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_1, full_int_array_5) + del matmul_1 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_201, False, False) + del parameter_201 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_2, full_int_array_5) + del matmul_2 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(dropout_2, parameter_199, False, False) + del parameter_199 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_6 = [44, -1, 12, 64] + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(matmul_3, full_int_array_6) + del matmul_3 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_1 = paddle._C_ops.add(reshape_0, parameter_196) + del parameter_196 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_2 = [add_1, reshape_1] + del add_1, reshape_1 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_3, einsum_4, einsum_5 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_2, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_2 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_4, + split_5, + ) = einsum_4 + del einsum_4 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_6, + split_7, + ) = einsum_5 + del einsum_5 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_2 = paddle._C_ops.add(reshape_0, parameter_198) + del parameter_198 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_3 = [add_2, reshape_3] + del add_2, reshape_3 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_6, einsum_7, einsum_8 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_3, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_3 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_8, + split_9, + ) = einsum_7 + del einsum_7 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_10, + split_11, + ) = einsum_8 + del einsum_8 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_7 = [1, 12, 44, 22] + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(einsum_6, full_int_array_7) + del einsum_6 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [2147483647] + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + reshape_4, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_4 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, 12, 22, 43] + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(slice_0, full_int_array_9) + del slice_0 + + # pd_op.full: (1xf64) <- () + full_15 = paddle._C_ops.full( + [1], float("1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (22xi64) <- (1xf64, 1xf64, 1xf64) + arange_2 = paddle.arange(full_5, full_11, full_15, dtype="int64") + del full_11, full_15, full_5 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_0 = paddle._C_ops.index_select(reshape_5, arange_2, 3) + del reshape_5 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_3 = paddle._C_ops.add(reshape_0, parameter_197) + del parameter_197, reshape_0 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_4 = [add_3, parameter_195] + del add_3, parameter_195 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_9, einsum_10, einsum_11 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_4, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_4 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_12, + split_13, + ) = einsum_10 + del einsum_10 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_14, + split_15, + ) = einsum_11 + del einsum_11 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_5 = [cast_5, einsum_9] + del einsum_9 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_12, einsum_13, einsum_14 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_5, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_5 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_16, + split_17, + ) = einsum_13 + del einsum_13 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_18, + split_19, + ) = einsum_14 + del einsum_14 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_4 = paddle._C_ops.add(einsum_3, index_select_0) + del einsum_3, index_select_0 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_5 = paddle._C_ops.add(add_4, einsum_12) + del add_4, einsum_12 + + # pd_op.full: (1xf32) <- () + full_16 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_3 = paddle._C_ops.scale(add_5, full_16, float("0"), True) + del add_5 + + # pd_op.transpose: (1x1x22x22xf32) <- (22x22x1x1xf32) + transpose_4 = paddle._C_ops.transpose(cast_3, [2, 3, 0, 1]) + del cast_3 + + # pd_op.full: (1xf32) <- () + full_17 = paddle._C_ops.full( + [1], float("1e+30"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x22x22xf32) <- (1x1x22x22xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_4, full_17, float("0"), True) + del full_17, transpose_4 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_0 = paddle._C_ops.subtract(scale_3, scale_4) + del scale_3 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_0 = paddle._C_ops.softmax(subtract_0, 3) + del subtract_0 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_6 = [dropout_4, reshape_2] + del dropout_4, reshape_2 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_15, einsum_16, einsum_17 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_6, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_6 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_20, + split_21, + ) = einsum_16 + del einsum_16 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_22, + split_23, + ) = einsum_17 + del einsum_17 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_10 = [22, 1, 768] + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_6 = paddle._C_ops.reshape(einsum_15, full_int_array_10) + del einsum_15 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_7 = [reshape_6, parameter_200] + del parameter_200, reshape_6 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_18, einsum_19, einsum_20 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_7, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_7 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_24, + split_25, + ) = einsum_19 + del einsum_19 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_26, + split_27, + ) = einsum_20 + del einsum_20 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_18 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_6 = paddle._C_ops.add(dropout_6, dropout_0) + del dropout_0, dropout_6 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_194, parameter_193, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_6, parameter_193, parameter_194 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_0, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_7 = paddle._C_ops.add(matmul_4, parameter_189) + del matmul_4, parameter_189 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_0 = paddle._C_ops.gelu(add_7, False) + del add_7 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_0 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_5 = paddle._C_ops.matmul(dropout_8, parameter_188, False, False) + del dropout_8, parameter_188 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_8 = paddle._C_ops.add(matmul_5, parameter_187) + del matmul_5, parameter_187 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_8 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_9 = paddle._C_ops.add(dropout_10, layer_norm_0) + del dropout_10, layer_norm_0 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_9, parameter_192, parameter_191, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_9, parameter_191, parameter_192 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_3, parameter_186, False, False) + del parameter_186 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(matmul_6, full_int_array_5) + del matmul_6 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_185, False, False) + del parameter_185 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_7, full_int_array_5) + del matmul_7 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_3, parameter_184, False, False) + del parameter_184 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_8, full_int_array_5) + del matmul_8 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(dropout_2, parameter_182, False, False) + del parameter_182 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_9, full_int_array_6) + del matmul_9 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_10 = paddle._C_ops.add(reshape_7, parameter_179) + del parameter_179 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_8 = [add_10, reshape_8] + del add_10, reshape_8 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_21, einsum_22, einsum_23 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_8, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_8 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_28, + split_29, + ) = einsum_22 + del einsum_22 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_30, + split_31, + ) = einsum_23 + del einsum_23 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_11 = paddle._C_ops.add(reshape_7, parameter_181) + del parameter_181 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_9 = [add_11, reshape_10] + del add_11, reshape_10 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_24, einsum_25, einsum_26 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_9, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_9 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_32, + split_33, + ) = einsum_25 + del einsum_25 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_34, + split_35, + ) = einsum_26 + del einsum_26 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(einsum_24, full_int_array_7) + del einsum_24 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + reshape_11, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_11 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(slice_1, full_int_array_9) + del slice_1 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_1 = paddle._C_ops.index_select(reshape_12, arange_2, 3) + del reshape_12 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_12 = paddle._C_ops.add(reshape_7, parameter_180) + del parameter_180, reshape_7 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_10 = [add_12, parameter_178] + del add_12, parameter_178 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_27, einsum_28, einsum_29 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_10, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_10 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_36, + split_37, + ) = einsum_28 + del einsum_28 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_38, + split_39, + ) = einsum_29 + del einsum_29 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_11 = [cast_5, einsum_27] + del einsum_27 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_30, einsum_31, einsum_32 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_11, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_11 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_40, + split_41, + ) = einsum_31 + del einsum_31 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_42, + split_43, + ) = einsum_32 + del einsum_32 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_13 = paddle._C_ops.add(einsum_21, index_select_1) + del einsum_21, index_select_1 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_14 = paddle._C_ops.add(add_13, einsum_30) + del add_13, einsum_30 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_5 = paddle._C_ops.scale(add_14, full_16, float("0"), True) + del add_14 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_1 = paddle._C_ops.subtract(scale_5, scale_4) + del scale_5 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_1 = paddle._C_ops.softmax(subtract_1, 3) + del subtract_1 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_12 = [dropout_12, reshape_9] + del dropout_12, reshape_9 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_33, einsum_34, einsum_35 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_12, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_12 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_44, + split_45, + ) = einsum_34 + del einsum_34 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_46, + split_47, + ) = einsum_35 + del einsum_35 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_13 = paddle._C_ops.reshape(einsum_33, full_int_array_10) + del einsum_33 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_13 = [reshape_13, parameter_183] + del parameter_183, reshape_13 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_36, einsum_37, einsum_38 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_13, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_13 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_48, + split_49, + ) = einsum_37 + del einsum_37 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_50, + split_51, + ) = einsum_38 + del einsum_38 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_36, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_36 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_15 = paddle._C_ops.add(dropout_14, layer_norm_3) + del dropout_14, layer_norm_3 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_15, parameter_177, parameter_176, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_15, parameter_176, parameter_177 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_16 = paddle._C_ops.add(matmul_10, parameter_172) + del matmul_10, parameter_172 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_1 = paddle._C_ops.gelu(add_16, False) + del add_16 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_1 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_11 = paddle._C_ops.matmul(dropout_16, parameter_171, False, False) + del dropout_16, parameter_171 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_11, parameter_170) + del matmul_11, parameter_170 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_17 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_18 = paddle._C_ops.add(dropout_18, layer_norm_6) + del dropout_18, layer_norm_6 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_175, parameter_174, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18, parameter_174, parameter_175 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_9, parameter_169, False, False) + del parameter_169 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_12, full_int_array_5) + del matmul_12 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_13 = paddle._C_ops.matmul(layer_norm_9, parameter_168, False, False) + del parameter_168 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(matmul_13, full_int_array_5) + del matmul_13 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_9, parameter_167, False, False) + del parameter_167 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_14, full_int_array_5) + del matmul_14 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_15 = paddle._C_ops.matmul(dropout_2, parameter_165, False, False) + del parameter_165 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_15, full_int_array_6) + del matmul_15 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_19 = paddle._C_ops.add(reshape_14, parameter_162) + del parameter_162 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_14 = [add_19, reshape_15] + del add_19, reshape_15 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_39, einsum_40, einsum_41 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_14, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_14 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_52, + split_53, + ) = einsum_40 + del einsum_40 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_54, + split_55, + ) = einsum_41 + del einsum_41 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_20 = paddle._C_ops.add(reshape_14, parameter_164) + del parameter_164 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_15 = [add_20, reshape_17] + del add_20, reshape_17 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_42, einsum_43, einsum_44 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_15, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_15 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_56, + split_57, + ) = einsum_43 + del einsum_43 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_58, + split_59, + ) = einsum_44 + del einsum_44 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(einsum_42, full_int_array_7) + del einsum_42 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + reshape_18, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_18 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_19 = paddle._C_ops.reshape(slice_2, full_int_array_9) + del slice_2 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_2 = paddle._C_ops.index_select(reshape_19, arange_2, 3) + del reshape_19 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_21 = paddle._C_ops.add(reshape_14, parameter_163) + del parameter_163, reshape_14 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_16 = [add_21, parameter_161] + del add_21, parameter_161 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_45, einsum_46, einsum_47 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_16, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_16 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_60, + split_61, + ) = einsum_46 + del einsum_46 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_62, + split_63, + ) = einsum_47 + del einsum_47 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_17 = [cast_5, einsum_45] + del einsum_45 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_48, einsum_49, einsum_50 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_17, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_17 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_64, + split_65, + ) = einsum_49 + del einsum_49 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_66, + split_67, + ) = einsum_50 + del einsum_50 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_22 = paddle._C_ops.add(einsum_39, index_select_2) + del einsum_39, index_select_2 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_23 = paddle._C_ops.add(add_22, einsum_48) + del add_22, einsum_48 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_6 = paddle._C_ops.scale(add_23, full_16, float("0"), True) + del add_23 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_2 = paddle._C_ops.subtract(scale_6, scale_4) + del scale_6 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_2 = paddle._C_ops.softmax(subtract_2, 3) + del subtract_2 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_18 = [dropout_20, reshape_16] + del dropout_20, reshape_16 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_51, einsum_52, einsum_53 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_18, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_18 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_68, + split_69, + ) = einsum_52 + del einsum_52 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_70, + split_71, + ) = einsum_53 + del einsum_53 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_20 = paddle._C_ops.reshape(einsum_51, full_int_array_10) + del einsum_51 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_19 = [reshape_20, parameter_166] + del parameter_166, reshape_20 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_54, einsum_55, einsum_56 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_19, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_19 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_72, + split_73, + ) = einsum_55 + del einsum_55 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_74, + split_75, + ) = einsum_56 + del einsum_56 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_54, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_54 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_24 = paddle._C_ops.add(dropout_22, layer_norm_9) + del dropout_22, layer_norm_9 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_24, parameter_160, parameter_159, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_24, parameter_159, parameter_160 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_12, parameter_156, False, False) + del parameter_156 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_25 = paddle._C_ops.add(matmul_16, parameter_155) + del matmul_16, parameter_155 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_2 = paddle._C_ops.gelu(add_25, False) + del add_25 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_2 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_17 = paddle._C_ops.matmul(dropout_24, parameter_154, False, False) + del dropout_24, parameter_154 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_17, parameter_153) + del matmul_17, parameter_153 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_26, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_26 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_27 = paddle._C_ops.add(dropout_26, layer_norm_12) + del dropout_26, layer_norm_12 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_27, parameter_158, parameter_157, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_27, parameter_157, parameter_158 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_15, parameter_152, False, False) + del parameter_152 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_18, full_int_array_5) + del matmul_18 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_15, parameter_151, False, False) + del parameter_151 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_19, full_int_array_5) + del matmul_19 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_20 = paddle._C_ops.matmul(layer_norm_15, parameter_150, False, False) + del parameter_150 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_23 = paddle._C_ops.reshape(matmul_20, full_int_array_5) + del matmul_20 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_21 = paddle._C_ops.matmul(dropout_2, parameter_148, False, False) + del parameter_148 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_21, full_int_array_6) + del matmul_21 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_28 = paddle._C_ops.add(reshape_21, parameter_145) + del parameter_145 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_20 = [add_28, reshape_22] + del add_28, reshape_22 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_57, einsum_58, einsum_59 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_20, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_20 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_76, + split_77, + ) = einsum_58 + del einsum_58 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_78, + split_79, + ) = einsum_59 + del einsum_59 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_29 = paddle._C_ops.add(reshape_21, parameter_147) + del parameter_147 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_21 = [add_29, reshape_24] + del add_29, reshape_24 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_60, einsum_61, einsum_62 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_21, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_21 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_80, + split_81, + ) = einsum_61 + del einsum_61 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_82, + split_83, + ) = einsum_62 + del einsum_62 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(einsum_60, full_int_array_7) + del einsum_60 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + reshape_25, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_25 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(slice_3, full_int_array_9) + del slice_3 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_3 = paddle._C_ops.index_select(reshape_26, arange_2, 3) + del reshape_26 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_30 = paddle._C_ops.add(reshape_21, parameter_146) + del parameter_146, reshape_21 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_22 = [add_30, parameter_144] + del add_30, parameter_144 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_63, einsum_64, einsum_65 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_22, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_22 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_84, + split_85, + ) = einsum_64 + del einsum_64 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_86, + split_87, + ) = einsum_65 + del einsum_65 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_23 = [cast_5, einsum_63] + del einsum_63 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_66, einsum_67, einsum_68 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_23, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_23 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_88, + split_89, + ) = einsum_67 + del einsum_67 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_90, + split_91, + ) = einsum_68 + del einsum_68 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_31 = paddle._C_ops.add(einsum_57, index_select_3) + del einsum_57, index_select_3 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_32 = paddle._C_ops.add(add_31, einsum_66) + del add_31, einsum_66 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_7 = paddle._C_ops.scale(add_32, full_16, float("0"), True) + del add_32 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_3 = paddle._C_ops.subtract(scale_7, scale_4) + del scale_7 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_3 = paddle._C_ops.softmax(subtract_3, 3) + del subtract_3 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_24 = [dropout_28, reshape_23] + del dropout_28, reshape_23 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_69, einsum_70, einsum_71 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_24, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_24 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_92, + split_93, + ) = einsum_70 + del einsum_70 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_94, + split_95, + ) = einsum_71 + del einsum_71 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(einsum_69, full_int_array_10) + del einsum_69 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_25 = [reshape_27, parameter_149] + del parameter_149, reshape_27 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_72, einsum_73, einsum_74 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_25, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_25 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_96, + split_97, + ) = einsum_73 + del einsum_73 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_98, + split_99, + ) = einsum_74 + del einsum_74 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_72, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_72 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_33 = paddle._C_ops.add(dropout_30, layer_norm_15) + del dropout_30, layer_norm_15 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_33, parameter_143, parameter_142, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_33, parameter_142, parameter_143 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_18, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_34 = paddle._C_ops.add(matmul_22, parameter_138) + del matmul_22, parameter_138 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_3 = paddle._C_ops.gelu(add_34, False) + del add_34 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_3 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_23 = paddle._C_ops.matmul(dropout_32, parameter_137, False, False) + del dropout_32, parameter_137 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_35 = paddle._C_ops.add(matmul_23, parameter_136) + del matmul_23, parameter_136 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_35, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_35 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_36 = paddle._C_ops.add(dropout_34, layer_norm_18) + del dropout_34, layer_norm_18 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_36, parameter_141, parameter_140, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_36, parameter_140, parameter_141 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_21, parameter_135, False, False) + del parameter_135 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_24, full_int_array_5) + del matmul_24 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_21, parameter_134, False, False) + del parameter_134 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_25, full_int_array_5) + del matmul_25 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_21, parameter_133, False, False) + del parameter_133 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_26, full_int_array_5) + del matmul_26 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(dropout_2, parameter_131, False, False) + del parameter_131 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(matmul_27, full_int_array_6) + del matmul_27 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_37 = paddle._C_ops.add(reshape_28, parameter_128) + del parameter_128 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_26 = [add_37, reshape_29] + del add_37, reshape_29 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_75, einsum_76, einsum_77 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_26, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_26 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_100, + split_101, + ) = einsum_76 + del einsum_76 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_102, + split_103, + ) = einsum_77 + del einsum_77 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_38 = paddle._C_ops.add(reshape_28, parameter_130) + del parameter_130 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_27 = [add_38, reshape_31] + del add_38, reshape_31 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_78, einsum_79, einsum_80 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_27, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_27 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_104, + split_105, + ) = einsum_79 + del einsum_79 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_106, + split_107, + ) = einsum_80 + del einsum_80 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(einsum_78, full_int_array_7) + del einsum_78 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + reshape_32, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_32 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(slice_4, full_int_array_9) + del slice_4 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_4 = paddle._C_ops.index_select(reshape_33, arange_2, 3) + del reshape_33 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_39 = paddle._C_ops.add(reshape_28, parameter_129) + del parameter_129, reshape_28 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_28 = [add_39, parameter_127] + del add_39, parameter_127 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_81, einsum_82, einsum_83 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_28, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_28 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_108, + split_109, + ) = einsum_82 + del einsum_82 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_110, + split_111, + ) = einsum_83 + del einsum_83 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_29 = [cast_5, einsum_81] + del einsum_81 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_84, einsum_85, einsum_86 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_29, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_29 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_112, + split_113, + ) = einsum_85 + del einsum_85 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_114, + split_115, + ) = einsum_86 + del einsum_86 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_40 = paddle._C_ops.add(einsum_75, index_select_4) + del einsum_75, index_select_4 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_41 = paddle._C_ops.add(add_40, einsum_84) + del add_40, einsum_84 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_8 = paddle._C_ops.scale(add_41, full_16, float("0"), True) + del add_41 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_4 = paddle._C_ops.subtract(scale_8, scale_4) + del scale_8 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_4 = paddle._C_ops.softmax(subtract_4, 3) + del subtract_4 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_30 = [dropout_36, reshape_30] + del dropout_36, reshape_30 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_87, einsum_88, einsum_89 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_30, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_30 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_116, + split_117, + ) = einsum_88 + del einsum_88 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_118, + split_119, + ) = einsum_89 + del einsum_89 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_34 = paddle._C_ops.reshape(einsum_87, full_int_array_10) + del einsum_87 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_31 = [reshape_34, parameter_132] + del parameter_132, reshape_34 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_90, einsum_91, einsum_92 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_31, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_31 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_120, + split_121, + ) = einsum_91 + del einsum_91 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_122, + split_123, + ) = einsum_92 + del einsum_92 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_90, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_90 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_42 = paddle._C_ops.add(dropout_38, layer_norm_21) + del dropout_38, layer_norm_21 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_126, parameter_125, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_42, parameter_125, parameter_126 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_28 = paddle._C_ops.matmul(layer_norm_24, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_43 = paddle._C_ops.add(matmul_28, parameter_121) + del matmul_28, parameter_121 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_43, False) + del add_43 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_4 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_29 = paddle._C_ops.matmul(dropout_40, parameter_120, False, False) + del dropout_40, parameter_120 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_29, parameter_119) + del matmul_29, parameter_119 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_44, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_44 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_45 = paddle._C_ops.add(dropout_42, layer_norm_24) + del dropout_42, layer_norm_24 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_124, parameter_123, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_45, parameter_123, parameter_124 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_27, parameter_118, False, False) + del parameter_118 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(matmul_30, full_int_array_5) + del matmul_30 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_27, parameter_117, False, False) + del parameter_117 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_31, full_int_array_5) + del matmul_31 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_27, parameter_116, False, False) + del parameter_116 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_32, full_int_array_5) + del matmul_32 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(dropout_2, parameter_114, False, False) + del parameter_114 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_33, full_int_array_6) + del matmul_33 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_46 = paddle._C_ops.add(reshape_35, parameter_111) + del parameter_111 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_32 = [add_46, reshape_36] + del add_46, reshape_36 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_93, einsum_94, einsum_95 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_32, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_32 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_124, + split_125, + ) = einsum_94 + del einsum_94 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_126, + split_127, + ) = einsum_95 + del einsum_95 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_47 = paddle._C_ops.add(reshape_35, parameter_113) + del parameter_113 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_33 = [add_47, reshape_38] + del add_47, reshape_38 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_96, einsum_97, einsum_98 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_33, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_33 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_128, + split_129, + ) = einsum_97 + del einsum_97 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_130, + split_131, + ) = einsum_98 + del einsum_98 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(einsum_96, full_int_array_7) + del einsum_96 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + reshape_39, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_39 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(slice_5, full_int_array_9) + del slice_5 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_5 = paddle._C_ops.index_select(reshape_40, arange_2, 3) + del reshape_40 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_48 = paddle._C_ops.add(reshape_35, parameter_112) + del parameter_112, reshape_35 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_34 = [add_48, parameter_110] + del add_48, parameter_110 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_99, einsum_100, einsum_101 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_34, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_34 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_132, + split_133, + ) = einsum_100 + del einsum_100 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_134, + split_135, + ) = einsum_101 + del einsum_101 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_35 = [cast_5, einsum_99] + del einsum_99 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_102, einsum_103, einsum_104 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_35, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_35 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_136, + split_137, + ) = einsum_103 + del einsum_103 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_138, + split_139, + ) = einsum_104 + del einsum_104 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_49 = paddle._C_ops.add(einsum_93, index_select_5) + del einsum_93, index_select_5 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_50 = paddle._C_ops.add(add_49, einsum_102) + del add_49, einsum_102 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_9 = paddle._C_ops.scale(add_50, full_16, float("0"), True) + del add_50 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_5 = paddle._C_ops.subtract(scale_9, scale_4) + del scale_9 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_5 = paddle._C_ops.softmax(subtract_5, 3) + del subtract_5 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_36 = [dropout_44, reshape_37] + del dropout_44, reshape_37 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_105, einsum_106, einsum_107 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_36, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_36 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_140, + split_141, + ) = einsum_106 + del einsum_106 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_142, + split_143, + ) = einsum_107 + del einsum_107 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(einsum_105, full_int_array_10) + del einsum_105 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_37 = [reshape_41, parameter_115] + del parameter_115, reshape_41 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_108, einsum_109, einsum_110 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_37, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_37 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_144, + split_145, + ) = einsum_109 + del einsum_109 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_146, + split_147, + ) = einsum_110 + del einsum_110 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_108, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_108 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_51 = paddle._C_ops.add(dropout_46, layer_norm_27) + del dropout_46, layer_norm_27 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_51, parameter_109, parameter_108, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_51, parameter_108, parameter_109 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_30, parameter_105, False, False) + del parameter_105 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_52 = paddle._C_ops.add(matmul_34, parameter_104) + del matmul_34, parameter_104 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_52, False) + del add_52 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_5 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_35 = paddle._C_ops.matmul(dropout_48, parameter_103, False, False) + del dropout_48, parameter_103 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_53 = paddle._C_ops.add(matmul_35, parameter_102) + del matmul_35, parameter_102 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_53, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_53 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_54 = paddle._C_ops.add(dropout_50, layer_norm_30) + del dropout_50, layer_norm_30 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_54, parameter_107, parameter_106, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_54, parameter_106, parameter_107 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_33, parameter_101, False, False) + del parameter_101 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_36, full_int_array_5) + del matmul_36 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_37 = paddle._C_ops.matmul(layer_norm_33, parameter_100, False, False) + del parameter_100 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(matmul_37, full_int_array_5) + del matmul_37 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_33, parameter_99, False, False) + del parameter_99 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_38, full_int_array_5) + del matmul_38 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_39 = paddle._C_ops.matmul(dropout_2, parameter_97, False, False) + del parameter_97 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_39, full_int_array_6) + del matmul_39 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_55 = paddle._C_ops.add(reshape_42, parameter_94) + del parameter_94 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_38 = [add_55, reshape_43] + del add_55, reshape_43 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_111, einsum_112, einsum_113 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_38, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_38 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_148, + split_149, + ) = einsum_112 + del einsum_112 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_150, + split_151, + ) = einsum_113 + del einsum_113 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_56 = paddle._C_ops.add(reshape_42, parameter_96) + del parameter_96 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_39 = [add_56, reshape_45] + del add_56, reshape_45 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_114, einsum_115, einsum_116 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_39, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_39 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_152, + split_153, + ) = einsum_115 + del einsum_115 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_154, + split_155, + ) = einsum_116 + del einsum_116 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(einsum_114, full_int_array_7) + del einsum_114 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + reshape_46, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_46 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(slice_6, full_int_array_9) + del slice_6 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_6 = paddle._C_ops.index_select(reshape_47, arange_2, 3) + del reshape_47 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_57 = paddle._C_ops.add(reshape_42, parameter_95) + del parameter_95, reshape_42 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_40 = [add_57, parameter_93] + del add_57, parameter_93 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_117, einsum_118, einsum_119 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_40, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_40 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_156, + split_157, + ) = einsum_118 + del einsum_118 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_158, + split_159, + ) = einsum_119 + del einsum_119 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_41 = [cast_5, einsum_117] + del einsum_117 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_120, einsum_121, einsum_122 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_41, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_41 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_160, + split_161, + ) = einsum_121 + del einsum_121 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_162, + split_163, + ) = einsum_122 + del einsum_122 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_58 = paddle._C_ops.add(einsum_111, index_select_6) + del einsum_111, index_select_6 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_59 = paddle._C_ops.add(add_58, einsum_120) + del add_58, einsum_120 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_10 = paddle._C_ops.scale(add_59, full_16, float("0"), True) + del add_59 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_6 = paddle._C_ops.subtract(scale_10, scale_4) + del scale_10 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_6 = paddle._C_ops.softmax(subtract_6, 3) + del subtract_6 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_42 = [dropout_52, reshape_44] + del dropout_52, reshape_44 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_123, einsum_124, einsum_125 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_42, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_42 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_164, + split_165, + ) = einsum_124 + del einsum_124 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_166, + split_167, + ) = einsum_125 + del einsum_125 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(einsum_123, full_int_array_10) + del einsum_123 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_43 = [reshape_48, parameter_98] + del parameter_98, reshape_48 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_126, einsum_127, einsum_128 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_43, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_43 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_168, + split_169, + ) = einsum_127 + del einsum_127 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_170, + split_171, + ) = einsum_128 + del einsum_128 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_126, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_126 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_60 = paddle._C_ops.add(dropout_54, layer_norm_33) + del dropout_54, layer_norm_33 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_60, parameter_92, parameter_91, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_60, parameter_91, parameter_92 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_36, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_61 = paddle._C_ops.add(matmul_40, parameter_87) + del matmul_40, parameter_87 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_61, False) + del add_61 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_6 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_41 = paddle._C_ops.matmul(dropout_56, parameter_86, False, False) + del dropout_56, parameter_86 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_62 = paddle._C_ops.add(matmul_41, parameter_85) + del matmul_41, parameter_85 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_62, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_62 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_63 = paddle._C_ops.add(dropout_58, layer_norm_36) + del dropout_58, layer_norm_36 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_63, parameter_90, parameter_89, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_63, parameter_89, parameter_90 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_39, parameter_84, False, False) + del parameter_84 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(matmul_42, full_int_array_5) + del matmul_42 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_39, parameter_83, False, False) + del parameter_83 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(matmul_43, full_int_array_5) + del matmul_43 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_44 = paddle._C_ops.matmul(layer_norm_39, parameter_82, False, False) + del parameter_82 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_51 = paddle._C_ops.reshape(matmul_44, full_int_array_5) + del matmul_44 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_45 = paddle._C_ops.matmul(dropout_2, parameter_80, False, False) + del parameter_80 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(matmul_45, full_int_array_6) + del matmul_45 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_64 = paddle._C_ops.add(reshape_49, parameter_77) + del parameter_77 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_44 = [add_64, reshape_50] + del add_64, reshape_50 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_129, einsum_130, einsum_131 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_44, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_44 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_172, + split_173, + ) = einsum_130 + del einsum_130 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_174, + split_175, + ) = einsum_131 + del einsum_131 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_65 = paddle._C_ops.add(reshape_49, parameter_79) + del parameter_79 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_45 = [add_65, reshape_52] + del add_65, reshape_52 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_132, einsum_133, einsum_134 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_45, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_45 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_176, + split_177, + ) = einsum_133 + del einsum_133 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_178, + split_179, + ) = einsum_134 + del einsum_134 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(einsum_132, full_int_array_7) + del einsum_132 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + reshape_53, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_53 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(slice_7, full_int_array_9) + del slice_7 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_7 = paddle._C_ops.index_select(reshape_54, arange_2, 3) + del reshape_54 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_66 = paddle._C_ops.add(reshape_49, parameter_78) + del parameter_78, reshape_49 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_46 = [add_66, parameter_76] + del add_66, parameter_76 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_135, einsum_136, einsum_137 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_46, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_46 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_180, + split_181, + ) = einsum_136 + del einsum_136 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_182, + split_183, + ) = einsum_137 + del einsum_137 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_47 = [cast_5, einsum_135] + del einsum_135 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_138, einsum_139, einsum_140 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_47, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_47 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_184, + split_185, + ) = einsum_139 + del einsum_139 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_186, + split_187, + ) = einsum_140 + del einsum_140 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_67 = paddle._C_ops.add(einsum_129, index_select_7) + del einsum_129, index_select_7 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_68 = paddle._C_ops.add(add_67, einsum_138) + del add_67, einsum_138 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_11 = paddle._C_ops.scale(add_68, full_16, float("0"), True) + del add_68 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_7 = paddle._C_ops.subtract(scale_11, scale_4) + del scale_11 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_7 = paddle._C_ops.softmax(subtract_7, 3) + del subtract_7 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_48 = [dropout_60, reshape_51] + del dropout_60, reshape_51 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_141, einsum_142, einsum_143 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_48, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_48 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_188, + split_189, + ) = einsum_142 + del einsum_142 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_190, + split_191, + ) = einsum_143 + del einsum_143 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(einsum_141, full_int_array_10) + del einsum_141 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_49 = [reshape_55, parameter_81] + del parameter_81, reshape_55 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_144, einsum_145, einsum_146 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_49, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_49 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_192, + split_193, + ) = einsum_145 + del einsum_145 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_194, + split_195, + ) = einsum_146 + del einsum_146 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_144, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_144 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_69 = paddle._C_ops.add(dropout_62, layer_norm_39) + del dropout_62, layer_norm_39 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_69, parameter_75, parameter_74, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_69, parameter_74, parameter_75 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_42, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_70 = paddle._C_ops.add(matmul_46, parameter_70) + del matmul_46, parameter_70 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_70, False) + del add_70 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_7 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_47 = paddle._C_ops.matmul(dropout_64, parameter_69, False, False) + del dropout_64, parameter_69 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_71 = paddle._C_ops.add(matmul_47, parameter_68) + del matmul_47, parameter_68 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_71, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_71 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_72 = paddle._C_ops.add(dropout_66, layer_norm_42) + del dropout_66, layer_norm_42 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_72, parameter_73, parameter_72, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_72, parameter_72, parameter_73 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_45, parameter_67, False, False) + del parameter_67 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(matmul_48, full_int_array_5) + del matmul_48 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_45, parameter_66, False, False) + del parameter_66 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(matmul_49, full_int_array_5) + del matmul_49 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_45, parameter_65, False, False) + del parameter_65 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(matmul_50, full_int_array_5) + del matmul_50 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(dropout_2, parameter_63, False, False) + del parameter_63 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_59 = paddle._C_ops.reshape(matmul_51, full_int_array_6) + del matmul_51 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_73 = paddle._C_ops.add(reshape_56, parameter_60) + del parameter_60 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_50 = [add_73, reshape_57] + del add_73, reshape_57 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_147, einsum_148, einsum_149 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_50, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_50 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_196, + split_197, + ) = einsum_148 + del einsum_148 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_198, + split_199, + ) = einsum_149 + del einsum_149 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_74 = paddle._C_ops.add(reshape_56, parameter_62) + del parameter_62 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_51 = [add_74, reshape_59] + del add_74, reshape_59 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_150, einsum_151, einsum_152 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_51, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_51 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_200, + split_201, + ) = einsum_151 + del einsum_151 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_202, + split_203, + ) = einsum_152 + del einsum_152 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(einsum_150, full_int_array_7) + del einsum_150 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + reshape_60, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_60 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(slice_8, full_int_array_9) + del slice_8 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_8 = paddle._C_ops.index_select(reshape_61, arange_2, 3) + del reshape_61 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_75 = paddle._C_ops.add(reshape_56, parameter_61) + del parameter_61, reshape_56 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_52 = [add_75, parameter_59] + del add_75, parameter_59 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_153, einsum_154, einsum_155 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_52, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_52 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_204, + split_205, + ) = einsum_154 + del einsum_154 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_206, + split_207, + ) = einsum_155 + del einsum_155 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_53 = [cast_5, einsum_153] + del einsum_153 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_156, einsum_157, einsum_158 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_53, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_53 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_208, + split_209, + ) = einsum_157 + del einsum_157 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_210, + split_211, + ) = einsum_158 + del einsum_158 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_76 = paddle._C_ops.add(einsum_147, index_select_8) + del einsum_147, index_select_8 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_77 = paddle._C_ops.add(add_76, einsum_156) + del add_76, einsum_156 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_12 = paddle._C_ops.scale(add_77, full_16, float("0"), True) + del add_77 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_8 = paddle._C_ops.subtract(scale_12, scale_4) + del scale_12 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_8 = paddle._C_ops.softmax(subtract_8, 3) + del subtract_8 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_54 = [dropout_68, reshape_58] + del dropout_68, reshape_58 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_159, einsum_160, einsum_161 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_54, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_54 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_212, + split_213, + ) = einsum_160 + del einsum_160 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_214, + split_215, + ) = einsum_161 + del einsum_161 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_62 = paddle._C_ops.reshape(einsum_159, full_int_array_10) + del einsum_159 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_55 = [reshape_62, parameter_64] + del parameter_64, reshape_62 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_162, einsum_163, einsum_164 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_55, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_55 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_216, + split_217, + ) = einsum_163 + del einsum_163 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_218, + split_219, + ) = einsum_164 + del einsum_164 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_162, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_162 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_78 = paddle._C_ops.add(dropout_70, layer_norm_45) + del dropout_70, layer_norm_45 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_78, parameter_58, parameter_57, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_78, parameter_57, parameter_58 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_52 = paddle._C_ops.matmul(layer_norm_48, parameter_54, False, False) + del parameter_54 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_79 = paddle._C_ops.add(matmul_52, parameter_53) + del matmul_52, parameter_53 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_79, False) + del add_79 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_8 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_53 = paddle._C_ops.matmul(dropout_72, parameter_52, False, False) + del dropout_72, parameter_52 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_53, parameter_51) + del matmul_53, parameter_51 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_80, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_80 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_81 = paddle._C_ops.add(dropout_74, layer_norm_48) + del dropout_74, layer_norm_48 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_56, parameter_55, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_81, parameter_55, parameter_56 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_51, parameter_50, False, False) + del parameter_50 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(matmul_54, full_int_array_5) + del matmul_54 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_51, parameter_49, False, False) + del parameter_49 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(matmul_55, full_int_array_5) + del matmul_55 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_51, parameter_48, False, False) + del parameter_48 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(matmul_56, full_int_array_5) + del matmul_56 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(dropout_2, parameter_46, False, False) + del parameter_46 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(matmul_57, full_int_array_6) + del matmul_57 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_82 = paddle._C_ops.add(reshape_63, parameter_43) + del parameter_43 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_56 = [add_82, reshape_64] + del add_82, reshape_64 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_165, einsum_166, einsum_167 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_56, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_56 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_220, + split_221, + ) = einsum_166 + del einsum_166 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_222, + split_223, + ) = einsum_167 + del einsum_167 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_83 = paddle._C_ops.add(reshape_63, parameter_45) + del parameter_45 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_57 = [add_83, reshape_66] + del add_83, reshape_66 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_168, einsum_169, einsum_170 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_57, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_57 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_224, + split_225, + ) = einsum_169 + del einsum_169 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_226, + split_227, + ) = einsum_170 + del einsum_170 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(einsum_168, full_int_array_7) + del einsum_168 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + reshape_67, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_67 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(slice_9, full_int_array_9) + del slice_9 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_9 = paddle._C_ops.index_select(reshape_68, arange_2, 3) + del reshape_68 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_84 = paddle._C_ops.add(reshape_63, parameter_44) + del parameter_44, reshape_63 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_58 = [add_84, parameter_42] + del add_84, parameter_42 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_171, einsum_172, einsum_173 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_58, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_58 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_228, + split_229, + ) = einsum_172 + del einsum_172 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_230, + split_231, + ) = einsum_173 + del einsum_173 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_59 = [cast_5, einsum_171] + del einsum_171 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_174, einsum_175, einsum_176 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_59, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_59 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_232, + split_233, + ) = einsum_175 + del einsum_175 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_234, + split_235, + ) = einsum_176 + del einsum_176 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_85 = paddle._C_ops.add(einsum_165, index_select_9) + del einsum_165, index_select_9 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_86 = paddle._C_ops.add(add_85, einsum_174) + del add_85, einsum_174 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_13 = paddle._C_ops.scale(add_86, full_16, float("0"), True) + del add_86 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_9 = paddle._C_ops.subtract(scale_13, scale_4) + del scale_13 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_9 = paddle._C_ops.softmax(subtract_9, 3) + del subtract_9 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_60 = [dropout_76, reshape_65] + del dropout_76, reshape_65 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_177, einsum_178, einsum_179 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_60, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_60 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_236, + split_237, + ) = einsum_178 + del einsum_178 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_238, + split_239, + ) = einsum_179 + del einsum_179 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(einsum_177, full_int_array_10) + del einsum_177 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_61 = [reshape_69, parameter_47] + del parameter_47, reshape_69 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_180, einsum_181, einsum_182 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_61, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_61 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_240, + split_241, + ) = einsum_181 + del einsum_181 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_242, + split_243, + ) = einsum_182 + del einsum_182 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_180, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_180 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_87 = paddle._C_ops.add(dropout_78, layer_norm_51) + del dropout_78, layer_norm_51 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_41, parameter_40, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_87, parameter_40, parameter_41 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_54, parameter_37, False, False) + del parameter_37 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_88 = paddle._C_ops.add(matmul_58, parameter_36) + del matmul_58, parameter_36 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_88, False) + del add_88 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_9 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_59 = paddle._C_ops.matmul(dropout_80, parameter_35, False, False) + del dropout_80, parameter_35 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_89 = paddle._C_ops.add(matmul_59, parameter_34) + del matmul_59, parameter_34 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_89, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_89 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_90 = paddle._C_ops.add(dropout_82, layer_norm_54) + del dropout_82, layer_norm_54 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_39, parameter_38, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_90, parameter_38, parameter_39 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_57, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(matmul_60, full_int_array_5) + del matmul_60 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_61 = paddle._C_ops.matmul(layer_norm_57, parameter_32, False, False) + del parameter_32 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(matmul_61, full_int_array_5) + del matmul_61 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_57, parameter_31, False, False) + del parameter_31 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_72 = paddle._C_ops.reshape(matmul_62, full_int_array_5) + del matmul_62 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_63 = paddle._C_ops.matmul(dropout_2, parameter_29, False, False) + del parameter_29 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(matmul_63, full_int_array_6) + del matmul_63 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_91 = paddle._C_ops.add(reshape_70, parameter_26) + del parameter_26 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_62 = [add_91, reshape_71] + del add_91, reshape_71 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_183, einsum_184, einsum_185 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_62, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_62 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_244, + split_245, + ) = einsum_184 + del einsum_184 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_246, + split_247, + ) = einsum_185 + del einsum_185 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_92 = paddle._C_ops.add(reshape_70, parameter_28) + del parameter_28 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_63 = [add_92, reshape_73] + del add_92, reshape_73 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_186, einsum_187, einsum_188 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_63, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_63 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_248, + split_249, + ) = einsum_187 + del einsum_187 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_250, + split_251, + ) = einsum_188 + del einsum_188 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(einsum_186, full_int_array_7) + del einsum_186 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + reshape_74, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_74 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(slice_10, full_int_array_9) + del slice_10 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_10 = paddle._C_ops.index_select(reshape_75, arange_2, 3) + del reshape_75 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_93 = paddle._C_ops.add(reshape_70, parameter_27) + del parameter_27, reshape_70 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_64 = [add_93, parameter_25] + del add_93, parameter_25 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_189, einsum_190, einsum_191 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_64, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_64 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_252, + split_253, + ) = einsum_190 + del einsum_190 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_254, + split_255, + ) = einsum_191 + del einsum_191 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_65 = [cast_5, einsum_189] + del einsum_189 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_192, einsum_193, einsum_194 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_65, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_65 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_256, + split_257, + ) = einsum_193 + del einsum_193 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_258, + split_259, + ) = einsum_194 + del einsum_194 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_94 = paddle._C_ops.add(einsum_183, index_select_10) + del einsum_183, index_select_10 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_95 = paddle._C_ops.add(add_94, einsum_192) + del add_94, einsum_192 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_14 = paddle._C_ops.scale(add_95, full_16, float("0"), True) + del add_95 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_10 = paddle._C_ops.subtract(scale_14, scale_4) + del scale_14 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_10 = paddle._C_ops.softmax(subtract_10, 3) + del subtract_10 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_66 = [dropout_84, reshape_72] + del dropout_84, reshape_72 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_195, einsum_196, einsum_197 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_66, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_66 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_260, + split_261, + ) = einsum_196 + del einsum_196 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_262, + split_263, + ) = einsum_197 + del einsum_197 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(einsum_195, full_int_array_10) + del einsum_195 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_67 = [reshape_76, parameter_30] + del parameter_30, reshape_76 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_198, einsum_199, einsum_200 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_67, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_67 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_264, + split_265, + ) = einsum_199 + del einsum_199 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_266, + split_267, + ) = einsum_200 + del einsum_200 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_198, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_198 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_96 = paddle._C_ops.add(dropout_86, layer_norm_57) + del dropout_86, layer_norm_57 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_96, parameter_24, parameter_23, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_96, parameter_23, parameter_24 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_60, parameter_20, False, False) + del parameter_20 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_97 = paddle._C_ops.add(matmul_64, parameter_19) + del matmul_64, parameter_19 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_97, False) + del add_97 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_10 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_65 = paddle._C_ops.matmul(dropout_88, parameter_18, False, False) + del dropout_88, parameter_18 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_98 = paddle._C_ops.add(matmul_65, parameter_17) + del matmul_65, parameter_17 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_98, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_98 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_99 = paddle._C_ops.add(dropout_90, layer_norm_60) + del dropout_90, layer_norm_60 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_99, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_99, parameter_21, parameter_22 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_63, parameter_16, False, False) + del parameter_16 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(matmul_66, full_int_array_5) + del matmul_66 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_63, parameter_15, False, False) + del parameter_15 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(matmul_67, full_int_array_5) + del matmul_67 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x768xf32, 768x768xf32) + matmul_68 = paddle._C_ops.matmul(layer_norm_63, parameter_14, False, False) + del parameter_14 + + # pd_op.reshape: (22x1x12x64xf32) <- (22x1x768xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(matmul_68, full_int_array_5) + del full_int_array_5, matmul_68 + + # pd_op.matmul: (44x1x768xf32) <- (44x1x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(dropout_2, parameter_12, False, False) + del dropout_2, parameter_12 + + # pd_op.reshape: (44x1x12x64xf32) <- (44x1x768xf32, 4xi64) + reshape_80 = paddle._C_ops.reshape(matmul_69, full_int_array_6) + del full_int_array_6, matmul_69 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_100 = paddle._C_ops.add(reshape_77, parameter_9) + del parameter_9 + + # builtin.combine: ([22x1x12x64xf32, 22x1x12x64xf32]) <- (22x1x12x64xf32, 22x1x12x64xf32) + combine_68 = [add_100, reshape_78] + del add_100, reshape_78 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x1x12x64xf32, 22x1x12x64xf32]) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + einsum_201, einsum_202, einsum_203 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_68, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_68 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_268, + split_269, + ) = einsum_202 + del einsum_202 + + # builtin.split: (22x1x12x64xf32, 22x1x12x64xf32) <- ([22x1x12x64xf32, 22x1x12x64xf32]) + ( + split_270, + split_271, + ) = einsum_203 + del einsum_203 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_101 = paddle._C_ops.add(reshape_77, parameter_11) + del parameter_11 + + # builtin.combine: ([22x1x12x64xf32, 44x1x12x64xf32]) <- (22x1x12x64xf32, 44x1x12x64xf32) + combine_69 = [add_101, reshape_80] + del add_101, reshape_80 + + # pd_op.einsum: (1x12x22x44xf32, [0xf32, 0xf32], [22x1x12x64xf32, 44x1x12x64xf32]) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + einsum_204, einsum_205, einsum_206 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_69, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_69 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_272, + split_273, + ) = einsum_205 + del einsum_205 + + # builtin.split: (22x1x12x64xf32, 44x1x12x64xf32) <- ([22x1x12x64xf32, 44x1x12x64xf32]) + ( + split_274, + split_275, + ) = einsum_206 + del einsum_206 + + # pd_op.reshape: (1x12x44x22xf32) <- (1x12x22x44xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(einsum_204, full_int_array_7) + del einsum_204, full_int_array_7 + + # pd_op.slice: (1x12x43x22xf32) <- (1x12x44x22xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + reshape_81, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del full_int_array_3, full_int_array_8, reshape_81 + + # pd_op.reshape: (1x12x22x43xf32) <- (1x12x43x22xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(slice_11, full_int_array_9) + del full_int_array_9, slice_11 + + # pd_op.index_select: (1x12x22x22xf32) <- (1x12x22x43xf32, 22xi64) + index_select_11 = paddle._C_ops.index_select(reshape_82, arange_2, 3) + del arange_2, reshape_82 + + # pd_op.add: (22x1x12x64xf32) <- (22x1x12x64xf32, 12x64xf32) + add_102 = paddle._C_ops.add(reshape_77, parameter_10) + del parameter_10, reshape_77 + + # builtin.combine: ([22x1x12x64xf32, 2x12x64xf32]) <- (22x1x12x64xf32, 2x12x64xf32) + combine_70 = [add_102, parameter_8] + del add_102, parameter_8 + + # pd_op.einsum: (22x1x12x2xf32, [0xf32, 0xf32], [22x1x12x64xf32, 2x12x64xf32]) <- ([22x1x12x64xf32, 2x12x64xf32]) + einsum_207, einsum_208, einsum_209 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_70, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_70 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_276, + split_277, + ) = einsum_208 + del einsum_208 + + # builtin.split: (22x1x12x64xf32, 2x12x64xf32) <- ([22x1x12x64xf32, 2x12x64xf32]) + ( + split_278, + split_279, + ) = einsum_209 + del einsum_209 + + # builtin.combine: ([22x22x1x2xf32, 22x1x12x2xf32]) <- (22x22x1x2xf32, 22x1x12x2xf32) + combine_71 = [cast_5, einsum_207] + del cast_5, einsum_207 + + # pd_op.einsum: (1x12x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x12x2xf32]) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + einsum_210, einsum_211, einsum_212 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_71, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_71 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_280, + split_281, + ) = einsum_211 + del einsum_211 + + # builtin.split: (22x22x1x2xf32, 22x1x12x2xf32) <- ([22x22x1x2xf32, 22x1x12x2xf32]) + ( + split_282, + split_283, + ) = einsum_212 + del einsum_212 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_103 = paddle._C_ops.add(einsum_201, index_select_11) + del einsum_201, index_select_11 + + # pd_op.add: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x12x22x22xf32) + add_104 = paddle._C_ops.add(add_103, einsum_210) + del add_103, einsum_210 + + # pd_op.scale: (1x12x22x22xf32) <- (1x12x22x22xf32, 1xf32) + scale_15 = paddle._C_ops.scale(add_104, full_16, float("0"), True) + del add_104, full_16 + + # pd_op.subtract: (1x12x22x22xf32) <- (1x12x22x22xf32, 1x1x22x22xf32) + subtract_11 = paddle._C_ops.subtract(scale_15, scale_4) + del scale_15, scale_4 + + # pd_op.softmax: (1x12x22x22xf32) <- (1x12x22x22xf32) + softmax_11 = paddle._C_ops.softmax(subtract_11, 3) + del subtract_11 + + # pd_op.dropout: (1x12x22x22xf32, 1x12x22x22xui8) <- (1x12x22x22xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # builtin.combine: ([1x12x22x22xf32, 22x1x12x64xf32]) <- (1x12x22x22xf32, 22x1x12x64xf32) + combine_72 = [dropout_92, reshape_79] + del dropout_92, reshape_79 + + # pd_op.einsum: (22x1x12x64xf32, [0xf32, 0xf32], [1x12x22x22xf32, 22x1x12x64xf32]) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + einsum_213, einsum_214, einsum_215 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_72, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_72 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_284, + split_285, + ) = einsum_214 + del einsum_214 + + # builtin.split: (1x12x22x22xf32, 22x1x12x64xf32) <- ([1x12x22x22xf32, 22x1x12x64xf32]) + ( + split_286, + split_287, + ) = einsum_215 + del einsum_215 + + # pd_op.reshape: (22x1x768xf32) <- (22x1x12x64xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(einsum_213, full_int_array_10) + del einsum_213, full_int_array_10 + + # builtin.combine: ([22x1x768xf32, 768x768xf32]) <- (22x1x768xf32, 768x768xf32) + combine_73 = [reshape_83, parameter_13] + del parameter_13, reshape_83 + + # pd_op.einsum: (22x1x768xf32, [0xf32, 0xf32], [22x1x768xf32, 768x768xf32]) <- ([22x1x768xf32, 768x768xf32]) + einsum_216, einsum_217, einsum_218 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_73, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_73 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_288, + split_289, + ) = einsum_217 + del einsum_217 + + # builtin.split: (22x1x768xf32, 768x768xf32) <- ([22x1x768xf32, 768x768xf32]) + ( + split_290, + split_291, + ) = einsum_218 + del einsum_218 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_216, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_216 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_105 = paddle._C_ops.add(dropout_94, layer_norm_63) + del dropout_94, layer_norm_63 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_105, parameter_7, parameter_6, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_105, parameter_6, parameter_7 + + # pd_op.matmul: (22x1x3072xf32) <- (22x1x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_66, parameter_3, False, False) + del parameter_3 + + # pd_op.add: (22x1x3072xf32) <- (22x1x3072xf32, 3072xf32) + add_106 = paddle._C_ops.add(matmul_70, parameter_2) + del matmul_70, parameter_2 + + # pd_op.gelu: (22x1x3072xf32) <- (22x1x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_106, False) + del add_106 + + # pd_op.dropout: (22x1x3072xf32, 22x1x3072xui8) <- (22x1x3072xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_11 + + # pd_op.matmul: (22x1x768xf32) <- (22x1x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(dropout_96, parameter_1, False, False) + del dropout_96, parameter_1 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 768xf32) + add_107 = paddle._C_ops.add(matmul_71, parameter_0) + del matmul_71, parameter_0 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_107, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_107 + + # pd_op.add: (22x1x768xf32) <- (22x1x768xf32, 22x1x768xf32) + add_108 = paddle._C_ops.add(dropout_98, layer_norm_66) + del dropout_98, layer_norm_66 + + # pd_op.layer_norm: (22x1x768xf32, 22x1xf32, 22x1xf32) <- (22x1x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_108, parameter_5, parameter_4, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_108, parameter_4, parameter_5 + + # pd_op.dropout: (22x1x768xf32, 22x1x768xui8) <- (22x1x768xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_69, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_3, layer_norm_69 + + # pd_op.transpose: (1x22x768xf32) <- (22x1x768xf32) + transpose_0 = paddle._C_ops.transpose(dropout_100, [1, 0, 2]) + del dropout_100 + + return transpose_0 diff --git a/paddle_samples/PaddleNLP/xlnet-base-cased/weight_meta.py b/paddle_samples/PaddleNLP/xlnet-base-cased/weight_meta.py new file mode 100644 index 000000000..cd5f57ddb --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-base-cased/weight_meta.py @@ -0,0 +1,2048 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.107443") + max_val = float("0.0964704") + mean = float("1.56014e-05") + std = float("0.0199908") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101696") + max_val = float("0.0937056") + mean = float("-5.01245e-06") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0700854") + max_val = float("0.0698896") + mean = float("0.000221964") + std = float("0.0206341") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0657833") + max_val = float("0.0554211") + mean = float("-0.000644022") + std = float("0.0201863") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0583059") + max_val = float("0.0721462") + mean = float("-0.000222992") + std = float("0.02004") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0540794") + max_val = float("0.0620946") + mean = float("0.000534411") + std = float("0.0202118") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100007") + max_val = float("0.0982943") + mean = float("-5.16948e-05") + std = float("0.0200102") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102099") + max_val = float("0.0956306") + mean = float("-9.37182e-06") + std = float("0.0199744") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.097491") + max_val = float("0.0942033") + mean = float("-1.36115e-06") + std = float("0.0199895") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0922146") + max_val = float("0.0997002") + mean = float("-2.24828e-05") + std = float("0.0200103") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0922014") + max_val = float("0.0950936") + mean = float("-1.00351e-05") + std = float("0.0200077") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.114515") + max_val = float("0.102741") + mean = float("1.28552e-05") + std = float("0.0199976") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.103737") + max_val = float("0.0964233") + mean = float("-3.63885e-05") + std = float("0.0199944") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0650684") + max_val = float("0.0546684") + mean = float("0.000543553") + std = float("0.0197757") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0639014") + max_val = float("0.0613068") + mean = float("0.000632318") + std = float("0.0199362") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0533739") + max_val = float("0.0662696") + mean = float("0.000393619") + std = float("0.0197884") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0806417") + max_val = float("0.0568907") + mean = float("1.81441e-05") + std = float("0.0199952") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0925241") + max_val = float("0.0936963") + mean = float("1.56955e-05") + std = float("0.0199829") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0896455") + max_val = float("0.0957685") + mean = float("-2.93209e-05") + std = float("0.0200303") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100002") + max_val = float("0.0897565") + mean = float("-3.3829e-05") + std = float("0.0199949") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.087256") + max_val = float("0.0921711") + mean = float("-8.63745e-06") + std = float("0.0199731") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0901312") + max_val = float("0.0977029") + mean = float("1.4295e-05") + std = float("0.0199949") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0948151") + max_val = float("0.0944873") + mean = float("-1.0718e-06") + std = float("0.0200009") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0973458") + max_val = float("0.101163") + mean = float("-7.36939e-08") + std = float("0.0199702") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.06053") + max_val = float("0.0605476") + mean = float("0.000147032") + std = float("0.0192807") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0577485") + max_val = float("0.0631016") + mean = float("0.000978126") + std = float("0.0200149") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0632318") + max_val = float("0.0590489") + mean = float("-0.000117242") + std = float("0.0197921") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.060267") + max_val = float("0.0617522") + mean = float("-0.000553793") + std = float("0.0198732") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0949403") + max_val = float("0.0921171") + mean = float("3.01454e-05") + std = float("0.0200084") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0943445") + max_val = float("0.0932839") + mean = float("-1.06599e-05") + std = float("0.0199709") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0926475") + max_val = float("0.0971318") + mean = float("-2.8691e-05") + std = float("0.0199919") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0970141") + max_val = float("0.0993287") + mean = float("2.19616e-05") + std = float("0.0200119") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.104719") + max_val = float("0.0961093") + mean = float("4.22517e-05") + std = float("0.0199673") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.100468") + max_val = float("0.0950249") + mean = float("-4.33319e-07") + std = float("0.0199887") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0980237") + max_val = float("0.0971189") + mean = float("3.46093e-05") + std = float("0.0200024") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0592654") + max_val = float("0.067405") + mean = float("4.66489e-05") + std = float("0.0200963") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0716498") + max_val = float("0.064698") + mean = float("0.000369003") + std = float("0.0198631") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0665162") + max_val = float("0.0689733") + mean = float("0.00104902") + std = float("0.0202639") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0621085") + max_val = float("0.0697879") + mean = float("0.000479245") + std = float("0.0200544") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.103067") + max_val = float("0.095989") + mean = float("1.96233e-05") + std = float("0.0199981") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0913213") + max_val = float("0.0886865") + mean = float("-1.91935e-05") + std = float("0.0200027") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.105297") + max_val = float("0.0904919") + mean = float("-4.00913e-05") + std = float("0.020032") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0922652") + max_val = float("0.10427") + mean = float("-9.49519e-06") + std = float("0.0200073") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0972453") + max_val = float("0.0973256") + mean = float("-9.09023e-06") + std = float("0.0199783") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.106309") + max_val = float("0.105607") + mean = float("1.10291e-05") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0975573") + max_val = float("0.0966081") + mean = float("-1.16121e-05") + std = float("0.0200024") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0568817") + max_val = float("0.0863073") + mean = float("-1.90975e-05") + std = float("0.0193294") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0652397") + max_val = float("0.0550323") + mean = float("-0.000326906") + std = float("0.0202795") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0611418") + max_val = float("0.0689744") + mean = float("0.000107098") + std = float("0.0206267") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0615758") + max_val = float("0.056214") + mean = float("-0.000222315") + std = float("0.0196349") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.103743") + max_val = float("0.0985834") + mean = float("7.308e-06") + std = float("0.0199702") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0918515") + max_val = float("0.0900846") + mean = float("-5.87464e-05") + std = float("0.0199767") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0912084") + max_val = float("0.0985826") + mean = float("-3.34139e-06") + std = float("0.0200063") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0894189") + max_val = float("0.0908116") + mean = float("2.44833e-05") + std = float("0.0199879") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0945442") + max_val = float("0.094868") + mean = float("-1.32895e-05") + std = float("0.019989") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.100435") + max_val = float("0.0962955") + mean = float("9.07234e-06") + std = float("0.0199853") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0965716") + max_val = float("0.0993747") + mean = float("-1.30958e-05") + std = float("0.0200061") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0606395") + max_val = float("0.0643096") + mean = float("0.000744228") + std = float("0.0200752") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0645655") + max_val = float("0.060837") + mean = float("-0.00195379") + std = float("0.0194921") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0606664") + max_val = float("0.057889") + mean = float("0.00184684") + std = float("0.0198158") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.065119") + max_val = float("0.062255") + mean = float("0.000553101") + std = float("0.0194296") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0937386") + max_val = float("0.100026") + mean = float("2.03894e-05") + std = float("0.0199983") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0962021") + max_val = float("0.0924932") + mean = float("-2.08112e-05") + std = float("0.0199941") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100484") + max_val = float("0.0937384") + mean = float("7.2557e-06") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0947122") + max_val = float("0.0929743") + mean = float("-4.40165e-06") + std = float("0.0200234") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0993096") + max_val = float("0.100981") + mean = float("-1.43578e-05") + std = float("0.0199956") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.105355") + max_val = float("0.0954512") + mean = float("-7.89531e-06") + std = float("0.019986") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0976812") + max_val = float("0.0994407") + mean = float("-2.10747e-05") + std = float("0.0200068") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0729831") + max_val = float("0.0634138") + mean = float("-3.56534e-05") + std = float("0.0201453") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0880961") + max_val = float("0.0590779") + mean = float("0.00160808") + std = float("0.02059") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0665995") + max_val = float("0.0610122") + mean = float("-0.000392904") + std = float("0.0201161") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0674377") + max_val = float("0.0548331") + mean = float("9.5121e-05") + std = float("0.0202969") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.106774") + max_val = float("0.0950007") + mean = float("-3.00636e-05") + std = float("0.0200442") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.103048") + max_val = float("0.0974949") + mean = float("-6.42821e-06") + std = float("0.0199987") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101272") + max_val = float("0.0955136") + mean = float("1.79861e-05") + std = float("0.0200075") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916424") + max_val = float("0.101734") + mean = float("4.76916e-07") + std = float("0.0199606") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0961003") + max_val = float("0.0940374") + mean = float("-6.02494e-06") + std = float("0.0199904") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0973096") + max_val = float("0.0955032") + mean = float("6.05686e-06") + std = float("0.0200104") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.104251") + max_val = float("0.0985613") + mean = float("-4.24987e-06") + std = float("0.019993") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0781129") + max_val = float("0.0686083") + mean = float("-0.000277737") + std = float("0.019889") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0549032") + max_val = float("0.0549252") + mean = float("0.000373617") + std = float("0.0193584") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0671851") + max_val = float("0.0564571") + mean = float("-0.000553835") + std = float("0.0207093") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0684723") + max_val = float("0.0828474") + mean = float("0.000446164") + std = float("0.020192") + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.093358") + max_val = float("0.0928076") + mean = float("-2.09587e-05") + std = float("0.0199891") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0949628") + max_val = float("0.0958047") + mean = float("-2.08746e-05") + std = float("0.0200089") + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.094868") + max_val = float("0.0951138") + mean = float("1.27738e-05") + std = float("0.0199868") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0953018") + max_val = float("0.0948122") + mean = float("5.91242e-06") + std = float("0.0199676") + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0918979") + max_val = float("0.0891179") + mean = float("-3.84901e-05") + std = float("0.019996") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0960438") + max_val = float("0.102767") + mean = float("-6.15459e-06") + std = float("0.0200163") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0998533") + max_val = float("0.100147") + mean = float("1.70663e-06") + std = float("0.0200142") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0589635") + max_val = float("0.0621262") + mean = float("0.00104112") + std = float("0.0198215") + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0600285") + max_val = float("0.0777009") + mean = float("-0.000173019") + std = float("0.0207801") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0604432") + max_val = float("0.0553187") + mean = float("-0.000111711") + std = float("0.0195423") + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.056655") + max_val = float("0.0710528") + mean = float("-0.000181629") + std = float("0.0202749") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0920498") + max_val = float("0.0956225") + mean = float("-3.74604e-05") + std = float("0.0199711") + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0965613") + max_val = float("0.0990465") + mean = float("2.11627e-05") + std = float("0.0199962") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0878856") + max_val = float("0.089544") + mean = float("-3.45358e-05") + std = float("0.0200235") + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0932969") + max_val = float("0.0873382") + mean = float("1.34506e-05") + std = float("0.019995") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0937826") + max_val = float("0.0977121") + mean = float("-3.7703e-05") + std = float("0.0200068") + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0980782") + max_val = float("0.0965137") + mean = float("2.04605e-06") + std = float("0.0200081") + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0943608") + max_val = float("0.0971133") + mean = float("-6.8108e-06") + std = float("0.0200112") + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0611639") + max_val = float("0.0706575") + mean = float("-0.000616581") + std = float("0.0195134") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0601015") + max_val = float("0.0596957") + mean = float("-0.00045851") + std = float("0.0206736") + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0710343") + max_val = float("0.0623504") + mean = float("0.000134263") + std = float("0.020837") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0560268") + max_val = float("0.0666249") + mean = float("0.000222676") + std = float("0.0204334") + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102647") + max_val = float("0.0960256") + mean = float("-1.14217e-05") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0977201") + max_val = float("0.0934928") + mean = float("-7.01381e-05") + std = float("0.0199996") + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0908481") + max_val = float("0.0940214") + mean = float("2.29566e-05") + std = float("0.0199939") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0902858") + max_val = float("0.102385") + mean = float("-1.86941e-06") + std = float("0.0200058") + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0961338") + max_val = float("0.104653") + mean = float("2.49637e-05") + std = float("0.0200507") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.101178") + max_val = float("0.0997435") + mean = float("-1.84101e-06") + std = float("0.0199933") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0986081") + max_val = float("0.0935717") + mean = float("-7.97306e-06") + std = float("0.0199965") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0756449") + max_val = float("0.0717684") + mean = float("0.000543476") + std = float("0.0199753") + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0529804") + max_val = float("0.0627784") + mean = float("0.000439584") + std = float("0.0196506") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0617306") + max_val = float("0.0535067") + mean = float("-0.000375348") + std = float("0.0199388") + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0633299") + max_val = float("0.0582609") + mean = float("0.000747421") + std = float("0.0194937") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.106269") + max_val = float("0.0909546") + mean = float("2.20117e-05") + std = float("0.0199926") + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101218") + max_val = float("0.0890308") + mean = float("-3.23903e-05") + std = float("0.0199803") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0932448") + max_val = float("0.0936962") + mean = float("1.98542e-05") + std = float("0.0200273") + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0983727") + max_val = float("0.0921061") + mean = float("3.60788e-06") + std = float("0.020025") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0954569") + max_val = float("0.0943231") + mean = float("-1.62991e-05") + std = float("0.0200094") + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0996068") + max_val = float("0.102469") + mean = float("1.54505e-05") + std = float("0.0200034") + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0980855") + max_val = float("0.104687") + mean = float("-1.86281e-05") + std = float("0.0199991") + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2, 12, 64] + dtype = "float32" + min_val = float("-0.0611005") + max_val = float("0.0627168") + mean = float("0.000420174") + std = float("0.0202319") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0615742") + max_val = float("0.0505877") + mean = float("-0.000225133") + std = float("0.0195975") + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0751916") + max_val = float("0.0657023") + mean = float("0.00057308") + std = float("0.0197271") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [12, 64] + dtype = "float32" + min_val = float("-0.0620252") + max_val = float("0.0779363") + mean = float("-0.000147361") + std = float("0.0197645") + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.10821") + max_val = float("0.0981826") + mean = float("-2.55125e-05") + std = float("0.0199975") + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0960096") + max_val = float("0.107631") + mean = float("-2.79243e-06") + std = float("0.0200059") + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100199") + max_val = float("0.0976366") + mean = float("4.94114e-05") + std = float("0.0200341") + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0939321") + max_val = float("0.0938979") + mean = float("3.61011e-06") + std = float("0.0200137") + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.093686") + max_val = float("0.0974261") + mean = float("-2.64717e-05") + std = float("0.020049") + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [32000, 768] + dtype = "float32" + min_val = float("-0.104007") + max_val = float("0.108997") + mean = float("-6.62682e-06") + std = float("0.02") + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [1, 1, 768] + dtype = "float32" + min_val = float("-0.0602759") + max_val = float("0.0636703") + mean = float("0.00026458") + std = float("0.0204492") + data = None diff --git a/paddle_samples/PaddleNLP/xlnet-large-cased/graph_net.json b/paddle_samples/PaddleNLP/xlnet-large-cased/graph_net.json new file mode 100644 index 000000000..04fb8d8f3 --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-large-cased/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "xlnet-large-cased", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/xlnet-large-cased/input_meta.py b/paddle_samples/PaddleNLP/xlnet-large-cased/input_meta.py new file mode 100644 index 000000000..feae33c5c --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-large-cased/input_meta.py @@ -0,0 +1,42 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 22] + dtype = "int64" + data = [ + 17, + 11368, + 19, + 94, + 304, + 27, + 2656, + 9, + 35, + 569, + 1899, + 75, + 392, + 1243, + 2626, + 21, + 58, + 4797, + 23, + 9, + 4, + 3, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 22] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 22] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] diff --git a/paddle_samples/PaddleNLP/xlnet-large-cased/model.py b/paddle_samples/PaddleNLP/xlnet-large-cased/model.py new file mode 100644 index 000000000..35160f7f8 --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-large-cased/model.py @@ -0,0 +1,8389 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + parameter_305, + parameter_306, + parameter_307, + parameter_308, + parameter_309, + parameter_310, + parameter_311, + parameter_312, + parameter_313, + parameter_314, + parameter_315, + parameter_316, + parameter_317, + parameter_318, + parameter_319, + parameter_320, + parameter_321, + parameter_322, + parameter_323, + parameter_324, + parameter_325, + parameter_326, + parameter_327, + parameter_328, + parameter_329, + parameter_330, + parameter_331, + parameter_332, + parameter_333, + parameter_334, + parameter_335, + parameter_336, + parameter_337, + parameter_338, + parameter_339, + parameter_340, + parameter_341, + parameter_342, + parameter_343, + parameter_344, + parameter_345, + parameter_346, + parameter_347, + parameter_348, + parameter_349, + parameter_350, + parameter_351, + parameter_352, + parameter_353, + parameter_354, + parameter_355, + parameter_356, + parameter_357, + parameter_358, + parameter_359, + parameter_360, + parameter_361, + parameter_362, + parameter_363, + parameter_364, + parameter_365, + parameter_366, + parameter_367, + parameter_368, + parameter_369, + parameter_370, + parameter_371, + parameter_372, + parameter_373, + parameter_374, + parameter_375, + parameter_376, + parameter_377, + parameter_378, + parameter_379, + parameter_380, + parameter_381, + parameter_382, + parameter_383, + parameter_384, + parameter_385, + parameter_386, + parameter_387, + parameter_388, + parameter_389, + parameter_390, + parameter_391, + parameter_392, + parameter_393, + parameter_394, + parameter_395, + parameter_396, + parameter_397, + parameter_398, + parameter_399, + parameter_400, + parameter_401, + parameter_402, + parameter_403, + parameter_404, + parameter_405, + parameter_406, + parameter_407, + parameter_408, + parameter_409, + data_0, + data_1, + data_2, + ): + # pd_op.transpose: (22x1xi64) <- (1x22xi64) + transpose_1 = paddle._C_ops.transpose(data_0, [1, 0]) + del data_0 + + # pd_op.transpose: (22x1xi64) <- (1x22xi64) + transpose_2 = paddle._C_ops.transpose(data_1, [1, 0]) + del data_1 + + # pd_op.transpose: (22x1xi64) <- (1x22xi64) + transpose_3 = paddle._C_ops.transpose(data_2, [1, 0]) + del data_2 + + # pd_op.cast: (22x1xf32) <- (22x1xi64) + cast_0 = paddle._C_ops.cast(transpose_3, paddle.float32) + del transpose_3 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (22x1xf32) <- (22x1xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.unsqueeze: (1x22x1xf32) <- (22x1xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.unsqueeze: (1x22x1x1xf32) <- (1x22x1xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.greater_than: (1x22x1x1xb) <- (1x22x1x1xf32, xf32) + greater_than_0 = paddle._C_ops.greater_than(unsqueeze_1, full_1) + del unsqueeze_1 + + # pd_op.cast: (1x22x1x1xf32) <- (1x22x1x1xb) + cast_1 = paddle._C_ops.cast(greater_than_0, paddle.float32) + del greater_than_0 + + # pd_op.full: (22xf32) <- () + full_2 = paddle._C_ops.full( + [22], float("1"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.diag: (22x22xf32) <- (22xf32) + diag_0 = paddle._C_ops.diag(full_2, 0, float("0")) + del full_2 + + # pd_op.scale: (22x22xf32) <- (22x22xf32, 1xf32) + scale_1 = paddle._C_ops.scale(diag_0, full_0, float("0"), True) + del diag_0, full_0 + + # pd_op.cast: (22x22xf32) <- (22x22xf32) + cast_2 = paddle._C_ops.cast(scale_1, paddle.float32) + del scale_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [2, 3] + + # pd_op.unsqueeze: (22x22x1x1xf32) <- (22x22xf32, 2xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(cast_2, full_int_array_2) + del cast_2, full_int_array_2 + + # pd_op.add: (22x22x1x1xf32) <- (1x22x1x1xf32, 22x22x1x1xf32) + add_0 = paddle._C_ops.add(cast_1, unsqueeze_2) + del cast_1, unsqueeze_2 + + # pd_op.greater_than: (22x22x1x1xb) <- (22x22x1x1xf32, xf32) + greater_than_1 = paddle._C_ops.greater_than(add_0, full_1) + del add_0, full_1 + + # pd_op.cast: (22x22x1x1xf32) <- (22x22x1x1xb) + cast_3 = paddle._C_ops.cast(greater_than_1, paddle.float32) + del greater_than_1 + + # pd_op.embedding: (22x1x1024xf32) <- (22x1xi64, 32000x1024xf32) + embedding_0 = paddle._C_ops.embedding(transpose_1, parameter_408, -1, False) + del parameter_408, transpose_1 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [1] + + # pd_op.unsqueeze: (22x1x1xi64) <- (22x1xi64, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_3) + + # pd_op.unsqueeze: (1x22x1xi64) <- (22x1xi64, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_2, full_int_array_0) + del full_int_array_0, transpose_2 + + # pd_op.not_equal: (22x22x1xb) <- (22x1x1xi64, 1x22x1xi64) + not_equal_0 = paddle._C_ops.not_equal(unsqueeze_3, unsqueeze_4) + del unsqueeze_3, unsqueeze_4 + + # pd_op.cast: (22x22x1xi64) <- (22x22x1xb) + cast_4 = paddle._C_ops.cast(not_equal_0, paddle.int64) + del not_equal_0 + + # pd_op.full: (1xi32) <- () + full_4 = paddle._C_ops.full( + [1], float("2"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.one_hot: (22x22x1x2xf32) <- (22x22x1xi64, 1xi32) + one_hot_0 = paddle._C_ops.one_hot( + cast_4 % paddle.cast(full_4, cast_4.dtype), full_4 + ) + del cast_4, full_4 + + # pd_op.cast: (22x22x1x2xf32) <- (22x22x1x2xf32) + cast_5 = paddle._C_ops.cast(one_hot_0, paddle.float32) + del one_hot_0 + + # pd_op.full: (1xf64) <- () + full_5 = paddle._C_ops.full( + [1], float("0"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_6 = paddle._C_ops.full( + [1], float("1024"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_7 = paddle._C_ops.full( + [1], float("2"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (512xf32) <- (1xf64, 1xf64, 1xf64) + arange_0 = paddle.arange(full_5, full_6, full_7, dtype="float32") + del full_6, full_7 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.000976562"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (512xf32) <- (512xf32, 1xf32) + scale_2 = paddle._C_ops.scale(arange_0, full_8, float("0"), True) + del arange_0, full_8 + + # pd_op.full: (512xf32) <- () + full_9 = paddle._C_ops.full( + [512], + float("10000"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.elementwise_pow: (512xf32) <- (512xf32, 512xf32) + elementwise_pow_0 = paddle._C_ops.elementwise_pow(full_9, scale_2) + del full_9, scale_2 + + # pd_op.full: (512xf32) <- () + full_10 = paddle._C_ops.full( + [512], + float("1"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.divide: (512xf32) <- (512xf32, 512xf32) + divide_0 = paddle._C_ops.divide(full_10, elementwise_pow_0) + del elementwise_pow_0, full_10 + + # pd_op.full: (1xf64) <- () + full_11 = paddle._C_ops.full( + [1], float("22"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_12 = paddle._C_ops.full( + [1], float("-22"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_13 = paddle._C_ops.full( + [1], float("-1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (44xf32) <- (1xf64, 1xf64, 1xf64) + arange_1 = paddle.arange(full_11, full_12, full_13, dtype="float32") + del full_12, full_13 + + # builtin.combine: ([44xf32, 512xf32]) <- (44xf32, 512xf32) + combine_0 = [arange_1, divide_0] + del arange_1, divide_0 + + # pd_op.einsum: (44x512xf32, [0xf32, 0xf32], [44xf32, 512xf32]) <- ([44xf32, 512xf32]) + einsum_0, einsum_1, einsum_2 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_0, "i,d->id"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_0 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_0, + split_1, + ) = einsum_1 + del einsum_1 + + # builtin.split: (44xf32, 512xf32) <- ([44xf32, 512xf32]) + ( + split_2, + split_3, + ) = einsum_2 + del einsum_2 + + # pd_op.sin: (44x512xf32) <- (44x512xf32) + sin_0 = paddle._C_ops.sin(einsum_0) + + # pd_op.cos: (44x512xf32) <- (44x512xf32) + cos_0 = paddle._C_ops.cos(einsum_0) + del einsum_0 + + # pd_op.full: (1xi32) <- () + full_14 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([44x512xf32, 44x512xf32]) <- (44x512xf32, 44x512xf32) + combine_1 = [sin_0, cos_0] + del cos_0, sin_0 + + # pd_op.concat: (44x1024xf32) <- ([44x512xf32, 44x512xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_1, full_14) + del combine_1, full_14 + + # pd_op.unsqueeze: (44x1x1024xf32) <- (44x1024xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(concat_0, full_int_array_3) + del concat_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 1, -1] + + # pd_op.expand: (44x1x1024xf32) <- (44x1x1024xf32, 3xi64) + expand_0 = paddle._C_ops.expand(unsqueeze_5, full_int_array_4) + del full_int_array_4, unsqueeze_5 + + # pd_op.dropout: (44x1x1024xf32, 44x1x1024xui8) <- (44x1x1024xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + expand_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del expand_0 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_407, False, False) + del parameter_407 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_5 = [22, 1, 16, 64] + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_0, full_int_array_5) + del matmul_0 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_406, False, False) + del parameter_406 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_1, full_int_array_5) + del matmul_1 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_405, False, False) + del parameter_405 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_2, full_int_array_5) + del matmul_2 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_3 = paddle._C_ops.matmul(dropout_2, parameter_403, False, False) + del parameter_403 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_6 = [44, -1, 16, 64] + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(matmul_3, full_int_array_6) + del matmul_3 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_1 = paddle._C_ops.add(reshape_0, parameter_400) + del parameter_400 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_2 = [add_1, reshape_1] + del add_1, reshape_1 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_3, einsum_4, einsum_5 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_2, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_2 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_4, + split_5, + ) = einsum_4 + del einsum_4 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_6, + split_7, + ) = einsum_5 + del einsum_5 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_2 = paddle._C_ops.add(reshape_0, parameter_402) + del parameter_402 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_3 = [add_2, reshape_3] + del add_2, reshape_3 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_6, einsum_7, einsum_8 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_3, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_3 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_8, + split_9, + ) = einsum_7 + del einsum_7 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_10, + split_11, + ) = einsum_8 + del einsum_8 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_7 = [1, 16, 44, 22] + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(einsum_6, full_int_array_7) + del einsum_6 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [2147483647] + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + reshape_4, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_4 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, 16, 22, 43] + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(slice_0, full_int_array_9) + del slice_0 + + # pd_op.full: (1xf64) <- () + full_15 = paddle._C_ops.full( + [1], float("1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (22xi64) <- (1xf64, 1xf64, 1xf64) + arange_2 = paddle.arange(full_5, full_11, full_15, dtype="int64") + del full_11, full_15, full_5 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_0 = paddle._C_ops.index_select(reshape_5, arange_2, 3) + del reshape_5 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_3 = paddle._C_ops.add(reshape_0, parameter_401) + del parameter_401, reshape_0 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_4 = [add_3, parameter_399] + del add_3, parameter_399 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_9, einsum_10, einsum_11 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_4, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_4 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_12, + split_13, + ) = einsum_10 + del einsum_10 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_14, + split_15, + ) = einsum_11 + del einsum_11 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_5 = [cast_5, einsum_9] + del einsum_9 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_12, einsum_13, einsum_14 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_5, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_5 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_16, + split_17, + ) = einsum_13 + del einsum_13 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_18, + split_19, + ) = einsum_14 + del einsum_14 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_4 = paddle._C_ops.add(einsum_3, index_select_0) + del einsum_3, index_select_0 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_5 = paddle._C_ops.add(add_4, einsum_12) + del add_4, einsum_12 + + # pd_op.full: (1xf32) <- () + full_16 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_3 = paddle._C_ops.scale(add_5, full_16, float("0"), True) + del add_5 + + # pd_op.transpose: (1x1x22x22xf32) <- (22x22x1x1xf32) + transpose_4 = paddle._C_ops.transpose(cast_3, [2, 3, 0, 1]) + del cast_3 + + # pd_op.full: (1xf32) <- () + full_17 = paddle._C_ops.full( + [1], float("1e+30"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x22x22xf32) <- (1x1x22x22xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_4, full_17, float("0"), True) + del full_17, transpose_4 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_0 = paddle._C_ops.subtract(scale_3, scale_4) + del scale_3 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_0 = paddle._C_ops.softmax(subtract_0, 3) + del subtract_0 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_6 = [dropout_4, reshape_2] + del dropout_4, reshape_2 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_15, einsum_16, einsum_17 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_6, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_6 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_20, + split_21, + ) = einsum_16 + del einsum_16 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_22, + split_23, + ) = einsum_17 + del einsum_17 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_10 = [22, 1, 1024] + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_6 = paddle._C_ops.reshape(einsum_15, full_int_array_10) + del einsum_15 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_7 = [reshape_6, parameter_404] + del parameter_404, reshape_6 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_18, einsum_19, einsum_20 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_7, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_7 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_24, + split_25, + ) = einsum_19 + del einsum_19 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_26, + split_27, + ) = einsum_20 + del einsum_20 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_18 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_6 = paddle._C_ops.add(dropout_6, dropout_0) + del dropout_0, dropout_6 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_398, parameter_397, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_6, parameter_397, parameter_398 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_0, parameter_394, False, False) + del parameter_394 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_7 = paddle._C_ops.add(matmul_4, parameter_393) + del matmul_4, parameter_393 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_0 = paddle._C_ops.gelu(add_7, False) + del add_7 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_0 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_5 = paddle._C_ops.matmul(dropout_8, parameter_392, False, False) + del dropout_8, parameter_392 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_8 = paddle._C_ops.add(matmul_5, parameter_391) + del matmul_5, parameter_391 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_8 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_9 = paddle._C_ops.add(dropout_10, layer_norm_0) + del dropout_10, layer_norm_0 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_9, parameter_396, parameter_395, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_9, parameter_395, parameter_396 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_3, parameter_390, False, False) + del parameter_390 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(matmul_6, full_int_array_5) + del matmul_6 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_389, False, False) + del parameter_389 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_7, full_int_array_5) + del matmul_7 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_3, parameter_388, False, False) + del parameter_388 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_8, full_int_array_5) + del matmul_8 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_9 = paddle._C_ops.matmul(dropout_2, parameter_386, False, False) + del parameter_386 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_9, full_int_array_6) + del matmul_9 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_10 = paddle._C_ops.add(reshape_7, parameter_383) + del parameter_383 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_8 = [add_10, reshape_8] + del add_10, reshape_8 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_21, einsum_22, einsum_23 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_8, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_8 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_28, + split_29, + ) = einsum_22 + del einsum_22 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_30, + split_31, + ) = einsum_23 + del einsum_23 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_11 = paddle._C_ops.add(reshape_7, parameter_385) + del parameter_385 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_9 = [add_11, reshape_10] + del add_11, reshape_10 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_24, einsum_25, einsum_26 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_9, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_9 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_32, + split_33, + ) = einsum_25 + del einsum_25 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_34, + split_35, + ) = einsum_26 + del einsum_26 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(einsum_24, full_int_array_7) + del einsum_24 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + reshape_11, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_11 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(slice_1, full_int_array_9) + del slice_1 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_1 = paddle._C_ops.index_select(reshape_12, arange_2, 3) + del reshape_12 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_12 = paddle._C_ops.add(reshape_7, parameter_384) + del parameter_384, reshape_7 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_10 = [add_12, parameter_382] + del add_12, parameter_382 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_27, einsum_28, einsum_29 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_10, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_10 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_36, + split_37, + ) = einsum_28 + del einsum_28 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_38, + split_39, + ) = einsum_29 + del einsum_29 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_11 = [cast_5, einsum_27] + del einsum_27 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_30, einsum_31, einsum_32 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_11, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_11 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_40, + split_41, + ) = einsum_31 + del einsum_31 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_42, + split_43, + ) = einsum_32 + del einsum_32 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_13 = paddle._C_ops.add(einsum_21, index_select_1) + del einsum_21, index_select_1 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_14 = paddle._C_ops.add(add_13, einsum_30) + del add_13, einsum_30 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_5 = paddle._C_ops.scale(add_14, full_16, float("0"), True) + del add_14 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_1 = paddle._C_ops.subtract(scale_5, scale_4) + del scale_5 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_1 = paddle._C_ops.softmax(subtract_1, 3) + del subtract_1 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_12 = [dropout_12, reshape_9] + del dropout_12, reshape_9 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_33, einsum_34, einsum_35 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_12, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_12 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_44, + split_45, + ) = einsum_34 + del einsum_34 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_46, + split_47, + ) = einsum_35 + del einsum_35 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_13 = paddle._C_ops.reshape(einsum_33, full_int_array_10) + del einsum_33 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_13 = [reshape_13, parameter_387] + del parameter_387, reshape_13 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_36, einsum_37, einsum_38 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_13, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_13 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_48, + split_49, + ) = einsum_37 + del einsum_37 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_50, + split_51, + ) = einsum_38 + del einsum_38 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_36, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_36 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_15 = paddle._C_ops.add(dropout_14, layer_norm_3) + del dropout_14, layer_norm_3 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_15, parameter_381, parameter_380, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_15, parameter_380, parameter_381 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_377, False, False) + del parameter_377 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_16 = paddle._C_ops.add(matmul_10, parameter_376) + del matmul_10, parameter_376 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_1 = paddle._C_ops.gelu(add_16, False) + del add_16 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_1 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_11 = paddle._C_ops.matmul(dropout_16, parameter_375, False, False) + del dropout_16, parameter_375 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_17 = paddle._C_ops.add(matmul_11, parameter_374) + del matmul_11, parameter_374 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_17 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_18 = paddle._C_ops.add(dropout_18, layer_norm_6) + del dropout_18, layer_norm_6 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_379, parameter_378, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18, parameter_378, parameter_379 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_9, parameter_373, False, False) + del parameter_373 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_12, full_int_array_5) + del matmul_12 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_13 = paddle._C_ops.matmul(layer_norm_9, parameter_372, False, False) + del parameter_372 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(matmul_13, full_int_array_5) + del matmul_13 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_9, parameter_371, False, False) + del parameter_371 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_14, full_int_array_5) + del matmul_14 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_15 = paddle._C_ops.matmul(dropout_2, parameter_369, False, False) + del parameter_369 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_15, full_int_array_6) + del matmul_15 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_19 = paddle._C_ops.add(reshape_14, parameter_366) + del parameter_366 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_14 = [add_19, reshape_15] + del add_19, reshape_15 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_39, einsum_40, einsum_41 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_14, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_14 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_52, + split_53, + ) = einsum_40 + del einsum_40 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_54, + split_55, + ) = einsum_41 + del einsum_41 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_20 = paddle._C_ops.add(reshape_14, parameter_368) + del parameter_368 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_15 = [add_20, reshape_17] + del add_20, reshape_17 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_42, einsum_43, einsum_44 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_15, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_15 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_56, + split_57, + ) = einsum_43 + del einsum_43 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_58, + split_59, + ) = einsum_44 + del einsum_44 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(einsum_42, full_int_array_7) + del einsum_42 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + reshape_18, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_18 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_19 = paddle._C_ops.reshape(slice_2, full_int_array_9) + del slice_2 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_2 = paddle._C_ops.index_select(reshape_19, arange_2, 3) + del reshape_19 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_21 = paddle._C_ops.add(reshape_14, parameter_367) + del parameter_367, reshape_14 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_16 = [add_21, parameter_365] + del add_21, parameter_365 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_45, einsum_46, einsum_47 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_16, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_16 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_60, + split_61, + ) = einsum_46 + del einsum_46 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_62, + split_63, + ) = einsum_47 + del einsum_47 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_17 = [cast_5, einsum_45] + del einsum_45 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_48, einsum_49, einsum_50 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_17, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_17 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_64, + split_65, + ) = einsum_49 + del einsum_49 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_66, + split_67, + ) = einsum_50 + del einsum_50 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_22 = paddle._C_ops.add(einsum_39, index_select_2) + del einsum_39, index_select_2 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_23 = paddle._C_ops.add(add_22, einsum_48) + del add_22, einsum_48 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_6 = paddle._C_ops.scale(add_23, full_16, float("0"), True) + del add_23 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_2 = paddle._C_ops.subtract(scale_6, scale_4) + del scale_6 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_2 = paddle._C_ops.softmax(subtract_2, 3) + del subtract_2 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_18 = [dropout_20, reshape_16] + del dropout_20, reshape_16 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_51, einsum_52, einsum_53 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_18, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_18 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_68, + split_69, + ) = einsum_52 + del einsum_52 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_70, + split_71, + ) = einsum_53 + del einsum_53 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_20 = paddle._C_ops.reshape(einsum_51, full_int_array_10) + del einsum_51 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_19 = [reshape_20, parameter_370] + del parameter_370, reshape_20 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_54, einsum_55, einsum_56 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_19, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_19 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_72, + split_73, + ) = einsum_55 + del einsum_55 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_74, + split_75, + ) = einsum_56 + del einsum_56 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_54, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_54 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_24 = paddle._C_ops.add(dropout_22, layer_norm_9) + del dropout_22, layer_norm_9 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_24, parameter_364, parameter_363, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_24, parameter_363, parameter_364 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_12, parameter_360, False, False) + del parameter_360 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_25 = paddle._C_ops.add(matmul_16, parameter_359) + del matmul_16, parameter_359 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_2 = paddle._C_ops.gelu(add_25, False) + del add_25 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_2 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_17 = paddle._C_ops.matmul(dropout_24, parameter_358, False, False) + del dropout_24, parameter_358 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_26 = paddle._C_ops.add(matmul_17, parameter_357) + del matmul_17, parameter_357 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_26, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_26 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_27 = paddle._C_ops.add(dropout_26, layer_norm_12) + del dropout_26, layer_norm_12 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_27, parameter_362, parameter_361, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_27, parameter_361, parameter_362 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_15, parameter_356, False, False) + del parameter_356 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_18, full_int_array_5) + del matmul_18 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_15, parameter_355, False, False) + del parameter_355 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_19, full_int_array_5) + del matmul_19 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_20 = paddle._C_ops.matmul(layer_norm_15, parameter_354, False, False) + del parameter_354 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_23 = paddle._C_ops.reshape(matmul_20, full_int_array_5) + del matmul_20 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_21 = paddle._C_ops.matmul(dropout_2, parameter_352, False, False) + del parameter_352 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_21, full_int_array_6) + del matmul_21 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_28 = paddle._C_ops.add(reshape_21, parameter_349) + del parameter_349 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_20 = [add_28, reshape_22] + del add_28, reshape_22 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_57, einsum_58, einsum_59 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_20, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_20 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_76, + split_77, + ) = einsum_58 + del einsum_58 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_78, + split_79, + ) = einsum_59 + del einsum_59 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_29 = paddle._C_ops.add(reshape_21, parameter_351) + del parameter_351 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_21 = [add_29, reshape_24] + del add_29, reshape_24 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_60, einsum_61, einsum_62 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_21, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_21 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_80, + split_81, + ) = einsum_61 + del einsum_61 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_82, + split_83, + ) = einsum_62 + del einsum_62 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(einsum_60, full_int_array_7) + del einsum_60 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + reshape_25, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_25 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(slice_3, full_int_array_9) + del slice_3 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_3 = paddle._C_ops.index_select(reshape_26, arange_2, 3) + del reshape_26 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_30 = paddle._C_ops.add(reshape_21, parameter_350) + del parameter_350, reshape_21 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_22 = [add_30, parameter_348] + del add_30, parameter_348 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_63, einsum_64, einsum_65 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_22, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_22 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_84, + split_85, + ) = einsum_64 + del einsum_64 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_86, + split_87, + ) = einsum_65 + del einsum_65 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_23 = [cast_5, einsum_63] + del einsum_63 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_66, einsum_67, einsum_68 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_23, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_23 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_88, + split_89, + ) = einsum_67 + del einsum_67 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_90, + split_91, + ) = einsum_68 + del einsum_68 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_31 = paddle._C_ops.add(einsum_57, index_select_3) + del einsum_57, index_select_3 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_32 = paddle._C_ops.add(add_31, einsum_66) + del add_31, einsum_66 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_7 = paddle._C_ops.scale(add_32, full_16, float("0"), True) + del add_32 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_3 = paddle._C_ops.subtract(scale_7, scale_4) + del scale_7 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_3 = paddle._C_ops.softmax(subtract_3, 3) + del subtract_3 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_24 = [dropout_28, reshape_23] + del dropout_28, reshape_23 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_69, einsum_70, einsum_71 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_24, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_24 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_92, + split_93, + ) = einsum_70 + del einsum_70 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_94, + split_95, + ) = einsum_71 + del einsum_71 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(einsum_69, full_int_array_10) + del einsum_69 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_25 = [reshape_27, parameter_353] + del parameter_353, reshape_27 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_72, einsum_73, einsum_74 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_25, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_25 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_96, + split_97, + ) = einsum_73 + del einsum_73 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_98, + split_99, + ) = einsum_74 + del einsum_74 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_72, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_72 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_33 = paddle._C_ops.add(dropout_30, layer_norm_15) + del dropout_30, layer_norm_15 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_33, parameter_347, parameter_346, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_33, parameter_346, parameter_347 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_18, parameter_343, False, False) + del parameter_343 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_34 = paddle._C_ops.add(matmul_22, parameter_342) + del matmul_22, parameter_342 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_3 = paddle._C_ops.gelu(add_34, False) + del add_34 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_3 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_23 = paddle._C_ops.matmul(dropout_32, parameter_341, False, False) + del dropout_32, parameter_341 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_35 = paddle._C_ops.add(matmul_23, parameter_340) + del matmul_23, parameter_340 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_35, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_35 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_36 = paddle._C_ops.add(dropout_34, layer_norm_18) + del dropout_34, layer_norm_18 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_36, parameter_345, parameter_344, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_36, parameter_344, parameter_345 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_21, parameter_339, False, False) + del parameter_339 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_24, full_int_array_5) + del matmul_24 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_21, parameter_338, False, False) + del parameter_338 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_25, full_int_array_5) + del matmul_25 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_21, parameter_337, False, False) + del parameter_337 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_26, full_int_array_5) + del matmul_26 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_27 = paddle._C_ops.matmul(dropout_2, parameter_335, False, False) + del parameter_335 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(matmul_27, full_int_array_6) + del matmul_27 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_37 = paddle._C_ops.add(reshape_28, parameter_332) + del parameter_332 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_26 = [add_37, reshape_29] + del add_37, reshape_29 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_75, einsum_76, einsum_77 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_26, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_26 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_100, + split_101, + ) = einsum_76 + del einsum_76 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_102, + split_103, + ) = einsum_77 + del einsum_77 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_38 = paddle._C_ops.add(reshape_28, parameter_334) + del parameter_334 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_27 = [add_38, reshape_31] + del add_38, reshape_31 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_78, einsum_79, einsum_80 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_27, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_27 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_104, + split_105, + ) = einsum_79 + del einsum_79 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_106, + split_107, + ) = einsum_80 + del einsum_80 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(einsum_78, full_int_array_7) + del einsum_78 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + reshape_32, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_32 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(slice_4, full_int_array_9) + del slice_4 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_4 = paddle._C_ops.index_select(reshape_33, arange_2, 3) + del reshape_33 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_39 = paddle._C_ops.add(reshape_28, parameter_333) + del parameter_333, reshape_28 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_28 = [add_39, parameter_331] + del add_39, parameter_331 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_81, einsum_82, einsum_83 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_28, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_28 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_108, + split_109, + ) = einsum_82 + del einsum_82 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_110, + split_111, + ) = einsum_83 + del einsum_83 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_29 = [cast_5, einsum_81] + del einsum_81 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_84, einsum_85, einsum_86 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_29, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_29 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_112, + split_113, + ) = einsum_85 + del einsum_85 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_114, + split_115, + ) = einsum_86 + del einsum_86 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_40 = paddle._C_ops.add(einsum_75, index_select_4) + del einsum_75, index_select_4 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_41 = paddle._C_ops.add(add_40, einsum_84) + del add_40, einsum_84 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_8 = paddle._C_ops.scale(add_41, full_16, float("0"), True) + del add_41 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_4 = paddle._C_ops.subtract(scale_8, scale_4) + del scale_8 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_4 = paddle._C_ops.softmax(subtract_4, 3) + del subtract_4 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_30 = [dropout_36, reshape_30] + del dropout_36, reshape_30 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_87, einsum_88, einsum_89 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_30, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_30 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_116, + split_117, + ) = einsum_88 + del einsum_88 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_118, + split_119, + ) = einsum_89 + del einsum_89 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_34 = paddle._C_ops.reshape(einsum_87, full_int_array_10) + del einsum_87 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_31 = [reshape_34, parameter_336] + del parameter_336, reshape_34 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_90, einsum_91, einsum_92 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_31, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_31 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_120, + split_121, + ) = einsum_91 + del einsum_91 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_122, + split_123, + ) = einsum_92 + del einsum_92 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_90, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_90 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_42 = paddle._C_ops.add(dropout_38, layer_norm_21) + del dropout_38, layer_norm_21 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_330, parameter_329, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_42, parameter_329, parameter_330 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_28 = paddle._C_ops.matmul(layer_norm_24, parameter_326, False, False) + del parameter_326 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_43 = paddle._C_ops.add(matmul_28, parameter_325) + del matmul_28, parameter_325 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_4 = paddle._C_ops.gelu(add_43, False) + del add_43 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_4 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_29 = paddle._C_ops.matmul(dropout_40, parameter_324, False, False) + del dropout_40, parameter_324 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_44 = paddle._C_ops.add(matmul_29, parameter_323) + del matmul_29, parameter_323 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_44, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_44 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_45 = paddle._C_ops.add(dropout_42, layer_norm_24) + del dropout_42, layer_norm_24 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_328, parameter_327, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_45, parameter_327, parameter_328 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_27, parameter_322, False, False) + del parameter_322 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(matmul_30, full_int_array_5) + del matmul_30 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_27, parameter_321, False, False) + del parameter_321 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_31, full_int_array_5) + del matmul_31 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_27, parameter_320, False, False) + del parameter_320 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_32, full_int_array_5) + del matmul_32 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_33 = paddle._C_ops.matmul(dropout_2, parameter_318, False, False) + del parameter_318 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_33, full_int_array_6) + del matmul_33 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_46 = paddle._C_ops.add(reshape_35, parameter_315) + del parameter_315 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_32 = [add_46, reshape_36] + del add_46, reshape_36 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_93, einsum_94, einsum_95 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_32, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_32 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_124, + split_125, + ) = einsum_94 + del einsum_94 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_126, + split_127, + ) = einsum_95 + del einsum_95 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_47 = paddle._C_ops.add(reshape_35, parameter_317) + del parameter_317 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_33 = [add_47, reshape_38] + del add_47, reshape_38 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_96, einsum_97, einsum_98 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_33, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_33 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_128, + split_129, + ) = einsum_97 + del einsum_97 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_130, + split_131, + ) = einsum_98 + del einsum_98 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(einsum_96, full_int_array_7) + del einsum_96 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + reshape_39, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_39 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(slice_5, full_int_array_9) + del slice_5 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_5 = paddle._C_ops.index_select(reshape_40, arange_2, 3) + del reshape_40 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_48 = paddle._C_ops.add(reshape_35, parameter_316) + del parameter_316, reshape_35 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_34 = [add_48, parameter_314] + del add_48, parameter_314 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_99, einsum_100, einsum_101 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_34, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_34 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_132, + split_133, + ) = einsum_100 + del einsum_100 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_134, + split_135, + ) = einsum_101 + del einsum_101 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_35 = [cast_5, einsum_99] + del einsum_99 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_102, einsum_103, einsum_104 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_35, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_35 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_136, + split_137, + ) = einsum_103 + del einsum_103 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_138, + split_139, + ) = einsum_104 + del einsum_104 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_49 = paddle._C_ops.add(einsum_93, index_select_5) + del einsum_93, index_select_5 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_50 = paddle._C_ops.add(add_49, einsum_102) + del add_49, einsum_102 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_9 = paddle._C_ops.scale(add_50, full_16, float("0"), True) + del add_50 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_5 = paddle._C_ops.subtract(scale_9, scale_4) + del scale_9 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_5 = paddle._C_ops.softmax(subtract_5, 3) + del subtract_5 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_36 = [dropout_44, reshape_37] + del dropout_44, reshape_37 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_105, einsum_106, einsum_107 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_36, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_36 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_140, + split_141, + ) = einsum_106 + del einsum_106 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_142, + split_143, + ) = einsum_107 + del einsum_107 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(einsum_105, full_int_array_10) + del einsum_105 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_37 = [reshape_41, parameter_319] + del parameter_319, reshape_41 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_108, einsum_109, einsum_110 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_37, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_37 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_144, + split_145, + ) = einsum_109 + del einsum_109 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_146, + split_147, + ) = einsum_110 + del einsum_110 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_108, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_108 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_51 = paddle._C_ops.add(dropout_46, layer_norm_27) + del dropout_46, layer_norm_27 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_51, parameter_313, parameter_312, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_51, parameter_312, parameter_313 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_30, parameter_309, False, False) + del parameter_309 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_52 = paddle._C_ops.add(matmul_34, parameter_308) + del matmul_34, parameter_308 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_5 = paddle._C_ops.gelu(add_52, False) + del add_52 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_5 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_35 = paddle._C_ops.matmul(dropout_48, parameter_307, False, False) + del dropout_48, parameter_307 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_53 = paddle._C_ops.add(matmul_35, parameter_306) + del matmul_35, parameter_306 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_53, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_53 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_54 = paddle._C_ops.add(dropout_50, layer_norm_30) + del dropout_50, layer_norm_30 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_54, parameter_311, parameter_310, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_54, parameter_310, parameter_311 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_33, parameter_305, False, False) + del parameter_305 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_36, full_int_array_5) + del matmul_36 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_37 = paddle._C_ops.matmul(layer_norm_33, parameter_304, False, False) + del parameter_304 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(matmul_37, full_int_array_5) + del matmul_37 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_33, parameter_303, False, False) + del parameter_303 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_38, full_int_array_5) + del matmul_38 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_39 = paddle._C_ops.matmul(dropout_2, parameter_301, False, False) + del parameter_301 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_39, full_int_array_6) + del matmul_39 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_55 = paddle._C_ops.add(reshape_42, parameter_298) + del parameter_298 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_38 = [add_55, reshape_43] + del add_55, reshape_43 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_111, einsum_112, einsum_113 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_38, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_38 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_148, + split_149, + ) = einsum_112 + del einsum_112 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_150, + split_151, + ) = einsum_113 + del einsum_113 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_56 = paddle._C_ops.add(reshape_42, parameter_300) + del parameter_300 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_39 = [add_56, reshape_45] + del add_56, reshape_45 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_114, einsum_115, einsum_116 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_39, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_39 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_152, + split_153, + ) = einsum_115 + del einsum_115 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_154, + split_155, + ) = einsum_116 + del einsum_116 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(einsum_114, full_int_array_7) + del einsum_114 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + reshape_46, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_46 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(slice_6, full_int_array_9) + del slice_6 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_6 = paddle._C_ops.index_select(reshape_47, arange_2, 3) + del reshape_47 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_57 = paddle._C_ops.add(reshape_42, parameter_299) + del parameter_299, reshape_42 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_40 = [add_57, parameter_297] + del add_57, parameter_297 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_117, einsum_118, einsum_119 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_40, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_40 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_156, + split_157, + ) = einsum_118 + del einsum_118 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_158, + split_159, + ) = einsum_119 + del einsum_119 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_41 = [cast_5, einsum_117] + del einsum_117 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_120, einsum_121, einsum_122 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_41, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_41 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_160, + split_161, + ) = einsum_121 + del einsum_121 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_162, + split_163, + ) = einsum_122 + del einsum_122 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_58 = paddle._C_ops.add(einsum_111, index_select_6) + del einsum_111, index_select_6 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_59 = paddle._C_ops.add(add_58, einsum_120) + del add_58, einsum_120 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_10 = paddle._C_ops.scale(add_59, full_16, float("0"), True) + del add_59 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_6 = paddle._C_ops.subtract(scale_10, scale_4) + del scale_10 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_6 = paddle._C_ops.softmax(subtract_6, 3) + del subtract_6 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_42 = [dropout_52, reshape_44] + del dropout_52, reshape_44 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_123, einsum_124, einsum_125 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_42, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_42 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_164, + split_165, + ) = einsum_124 + del einsum_124 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_166, + split_167, + ) = einsum_125 + del einsum_125 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(einsum_123, full_int_array_10) + del einsum_123 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_43 = [reshape_48, parameter_302] + del parameter_302, reshape_48 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_126, einsum_127, einsum_128 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_43, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_43 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_168, + split_169, + ) = einsum_127 + del einsum_127 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_170, + split_171, + ) = einsum_128 + del einsum_128 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_126, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_126 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_60 = paddle._C_ops.add(dropout_54, layer_norm_33) + del dropout_54, layer_norm_33 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_60, parameter_296, parameter_295, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_60, parameter_295, parameter_296 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_36, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_61 = paddle._C_ops.add(matmul_40, parameter_291) + del matmul_40, parameter_291 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_6 = paddle._C_ops.gelu(add_61, False) + del add_61 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_6 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_41 = paddle._C_ops.matmul(dropout_56, parameter_290, False, False) + del dropout_56, parameter_290 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_62 = paddle._C_ops.add(matmul_41, parameter_289) + del matmul_41, parameter_289 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_62, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_62 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_63 = paddle._C_ops.add(dropout_58, layer_norm_36) + del dropout_58, layer_norm_36 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_63, parameter_294, parameter_293, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_63, parameter_293, parameter_294 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_39, parameter_288, False, False) + del parameter_288 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(matmul_42, full_int_array_5) + del matmul_42 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_39, parameter_287, False, False) + del parameter_287 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(matmul_43, full_int_array_5) + del matmul_43 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_44 = paddle._C_ops.matmul(layer_norm_39, parameter_286, False, False) + del parameter_286 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_51 = paddle._C_ops.reshape(matmul_44, full_int_array_5) + del matmul_44 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_45 = paddle._C_ops.matmul(dropout_2, parameter_284, False, False) + del parameter_284 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(matmul_45, full_int_array_6) + del matmul_45 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_64 = paddle._C_ops.add(reshape_49, parameter_281) + del parameter_281 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_44 = [add_64, reshape_50] + del add_64, reshape_50 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_129, einsum_130, einsum_131 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_44, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_44 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_172, + split_173, + ) = einsum_130 + del einsum_130 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_174, + split_175, + ) = einsum_131 + del einsum_131 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_65 = paddle._C_ops.add(reshape_49, parameter_283) + del parameter_283 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_45 = [add_65, reshape_52] + del add_65, reshape_52 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_132, einsum_133, einsum_134 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_45, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_45 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_176, + split_177, + ) = einsum_133 + del einsum_133 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_178, + split_179, + ) = einsum_134 + del einsum_134 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(einsum_132, full_int_array_7) + del einsum_132 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + reshape_53, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_53 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(slice_7, full_int_array_9) + del slice_7 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_7 = paddle._C_ops.index_select(reshape_54, arange_2, 3) + del reshape_54 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_66 = paddle._C_ops.add(reshape_49, parameter_282) + del parameter_282, reshape_49 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_46 = [add_66, parameter_280] + del add_66, parameter_280 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_135, einsum_136, einsum_137 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_46, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_46 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_180, + split_181, + ) = einsum_136 + del einsum_136 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_182, + split_183, + ) = einsum_137 + del einsum_137 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_47 = [cast_5, einsum_135] + del einsum_135 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_138, einsum_139, einsum_140 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_47, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_47 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_184, + split_185, + ) = einsum_139 + del einsum_139 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_186, + split_187, + ) = einsum_140 + del einsum_140 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_67 = paddle._C_ops.add(einsum_129, index_select_7) + del einsum_129, index_select_7 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_68 = paddle._C_ops.add(add_67, einsum_138) + del add_67, einsum_138 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_11 = paddle._C_ops.scale(add_68, full_16, float("0"), True) + del add_68 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_7 = paddle._C_ops.subtract(scale_11, scale_4) + del scale_11 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_7 = paddle._C_ops.softmax(subtract_7, 3) + del subtract_7 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_48 = [dropout_60, reshape_51] + del dropout_60, reshape_51 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_141, einsum_142, einsum_143 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_48, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_48 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_188, + split_189, + ) = einsum_142 + del einsum_142 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_190, + split_191, + ) = einsum_143 + del einsum_143 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(einsum_141, full_int_array_10) + del einsum_141 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_49 = [reshape_55, parameter_285] + del parameter_285, reshape_55 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_144, einsum_145, einsum_146 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_49, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_49 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_192, + split_193, + ) = einsum_145 + del einsum_145 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_194, + split_195, + ) = einsum_146 + del einsum_146 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_144, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_144 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_69 = paddle._C_ops.add(dropout_62, layer_norm_39) + del dropout_62, layer_norm_39 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_69, parameter_279, parameter_278, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_69, parameter_278, parameter_279 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_42, parameter_275, False, False) + del parameter_275 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_70 = paddle._C_ops.add(matmul_46, parameter_274) + del matmul_46, parameter_274 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_7 = paddle._C_ops.gelu(add_70, False) + del add_70 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_7 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_47 = paddle._C_ops.matmul(dropout_64, parameter_273, False, False) + del dropout_64, parameter_273 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_71 = paddle._C_ops.add(matmul_47, parameter_272) + del matmul_47, parameter_272 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_71, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_71 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_72 = paddle._C_ops.add(dropout_66, layer_norm_42) + del dropout_66, layer_norm_42 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_72, parameter_277, parameter_276, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_72, parameter_276, parameter_277 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_45, parameter_271, False, False) + del parameter_271 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(matmul_48, full_int_array_5) + del matmul_48 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_45, parameter_270, False, False) + del parameter_270 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(matmul_49, full_int_array_5) + del matmul_49 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_45, parameter_269, False, False) + del parameter_269 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(matmul_50, full_int_array_5) + del matmul_50 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_51 = paddle._C_ops.matmul(dropout_2, parameter_267, False, False) + del parameter_267 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_59 = paddle._C_ops.reshape(matmul_51, full_int_array_6) + del matmul_51 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_73 = paddle._C_ops.add(reshape_56, parameter_264) + del parameter_264 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_50 = [add_73, reshape_57] + del add_73, reshape_57 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_147, einsum_148, einsum_149 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_50, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_50 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_196, + split_197, + ) = einsum_148 + del einsum_148 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_198, + split_199, + ) = einsum_149 + del einsum_149 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_74 = paddle._C_ops.add(reshape_56, parameter_266) + del parameter_266 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_51 = [add_74, reshape_59] + del add_74, reshape_59 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_150, einsum_151, einsum_152 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_51, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_51 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_200, + split_201, + ) = einsum_151 + del einsum_151 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_202, + split_203, + ) = einsum_152 + del einsum_152 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(einsum_150, full_int_array_7) + del einsum_150 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + reshape_60, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_60 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(slice_8, full_int_array_9) + del slice_8 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_8 = paddle._C_ops.index_select(reshape_61, arange_2, 3) + del reshape_61 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_75 = paddle._C_ops.add(reshape_56, parameter_265) + del parameter_265, reshape_56 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_52 = [add_75, parameter_263] + del add_75, parameter_263 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_153, einsum_154, einsum_155 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_52, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_52 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_204, + split_205, + ) = einsum_154 + del einsum_154 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_206, + split_207, + ) = einsum_155 + del einsum_155 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_53 = [cast_5, einsum_153] + del einsum_153 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_156, einsum_157, einsum_158 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_53, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_53 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_208, + split_209, + ) = einsum_157 + del einsum_157 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_210, + split_211, + ) = einsum_158 + del einsum_158 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_76 = paddle._C_ops.add(einsum_147, index_select_8) + del einsum_147, index_select_8 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_77 = paddle._C_ops.add(add_76, einsum_156) + del add_76, einsum_156 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_12 = paddle._C_ops.scale(add_77, full_16, float("0"), True) + del add_77 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_8 = paddle._C_ops.subtract(scale_12, scale_4) + del scale_12 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_8 = paddle._C_ops.softmax(subtract_8, 3) + del subtract_8 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_54 = [dropout_68, reshape_58] + del dropout_68, reshape_58 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_159, einsum_160, einsum_161 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_54, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_54 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_212, + split_213, + ) = einsum_160 + del einsum_160 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_214, + split_215, + ) = einsum_161 + del einsum_161 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_62 = paddle._C_ops.reshape(einsum_159, full_int_array_10) + del einsum_159 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_55 = [reshape_62, parameter_268] + del parameter_268, reshape_62 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_162, einsum_163, einsum_164 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_55, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_55 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_216, + split_217, + ) = einsum_163 + del einsum_163 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_218, + split_219, + ) = einsum_164 + del einsum_164 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_162, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_162 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_78 = paddle._C_ops.add(dropout_70, layer_norm_45) + del dropout_70, layer_norm_45 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_78, parameter_262, parameter_261, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_78, parameter_261, parameter_262 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_52 = paddle._C_ops.matmul(layer_norm_48, parameter_258, False, False) + del parameter_258 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_79 = paddle._C_ops.add(matmul_52, parameter_257) + del matmul_52, parameter_257 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_8 = paddle._C_ops.gelu(add_79, False) + del add_79 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_8 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_53 = paddle._C_ops.matmul(dropout_72, parameter_256, False, False) + del dropout_72, parameter_256 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_80 = paddle._C_ops.add(matmul_53, parameter_255) + del matmul_53, parameter_255 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_80, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_80 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_81 = paddle._C_ops.add(dropout_74, layer_norm_48) + del dropout_74, layer_norm_48 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_260, parameter_259, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_81, parameter_259, parameter_260 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_51, parameter_254, False, False) + del parameter_254 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(matmul_54, full_int_array_5) + del matmul_54 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_51, parameter_253, False, False) + del parameter_253 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(matmul_55, full_int_array_5) + del matmul_55 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_51, parameter_252, False, False) + del parameter_252 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(matmul_56, full_int_array_5) + del matmul_56 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_57 = paddle._C_ops.matmul(dropout_2, parameter_250, False, False) + del parameter_250 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(matmul_57, full_int_array_6) + del matmul_57 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_82 = paddle._C_ops.add(reshape_63, parameter_247) + del parameter_247 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_56 = [add_82, reshape_64] + del add_82, reshape_64 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_165, einsum_166, einsum_167 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_56, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_56 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_220, + split_221, + ) = einsum_166 + del einsum_166 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_222, + split_223, + ) = einsum_167 + del einsum_167 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_83 = paddle._C_ops.add(reshape_63, parameter_249) + del parameter_249 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_57 = [add_83, reshape_66] + del add_83, reshape_66 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_168, einsum_169, einsum_170 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_57, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_57 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_224, + split_225, + ) = einsum_169 + del einsum_169 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_226, + split_227, + ) = einsum_170 + del einsum_170 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(einsum_168, full_int_array_7) + del einsum_168 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + reshape_67, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_67 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(slice_9, full_int_array_9) + del slice_9 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_9 = paddle._C_ops.index_select(reshape_68, arange_2, 3) + del reshape_68 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_84 = paddle._C_ops.add(reshape_63, parameter_248) + del parameter_248, reshape_63 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_58 = [add_84, parameter_246] + del add_84, parameter_246 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_171, einsum_172, einsum_173 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_58, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_58 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_228, + split_229, + ) = einsum_172 + del einsum_172 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_230, + split_231, + ) = einsum_173 + del einsum_173 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_59 = [cast_5, einsum_171] + del einsum_171 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_174, einsum_175, einsum_176 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_59, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_59 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_232, + split_233, + ) = einsum_175 + del einsum_175 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_234, + split_235, + ) = einsum_176 + del einsum_176 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_85 = paddle._C_ops.add(einsum_165, index_select_9) + del einsum_165, index_select_9 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_86 = paddle._C_ops.add(add_85, einsum_174) + del add_85, einsum_174 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_13 = paddle._C_ops.scale(add_86, full_16, float("0"), True) + del add_86 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_9 = paddle._C_ops.subtract(scale_13, scale_4) + del scale_13 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_9 = paddle._C_ops.softmax(subtract_9, 3) + del subtract_9 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_60 = [dropout_76, reshape_65] + del dropout_76, reshape_65 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_177, einsum_178, einsum_179 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_60, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_60 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_236, + split_237, + ) = einsum_178 + del einsum_178 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_238, + split_239, + ) = einsum_179 + del einsum_179 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(einsum_177, full_int_array_10) + del einsum_177 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_61 = [reshape_69, parameter_251] + del parameter_251, reshape_69 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_180, einsum_181, einsum_182 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_61, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_61 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_240, + split_241, + ) = einsum_181 + del einsum_181 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_242, + split_243, + ) = einsum_182 + del einsum_182 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_180, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_180 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_87 = paddle._C_ops.add(dropout_78, layer_norm_51) + del dropout_78, layer_norm_51 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_245, parameter_244, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_87, parameter_244, parameter_245 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_54, parameter_241, False, False) + del parameter_241 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_88 = paddle._C_ops.add(matmul_58, parameter_240) + del matmul_58, parameter_240 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_9 = paddle._C_ops.gelu(add_88, False) + del add_88 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_9 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_59 = paddle._C_ops.matmul(dropout_80, parameter_239, False, False) + del dropout_80, parameter_239 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_89 = paddle._C_ops.add(matmul_59, parameter_238) + del matmul_59, parameter_238 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_89, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_89 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_90 = paddle._C_ops.add(dropout_82, layer_norm_54) + del dropout_82, layer_norm_54 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_243, parameter_242, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_90, parameter_242, parameter_243 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_57, parameter_237, False, False) + del parameter_237 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(matmul_60, full_int_array_5) + del matmul_60 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_61 = paddle._C_ops.matmul(layer_norm_57, parameter_236, False, False) + del parameter_236 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(matmul_61, full_int_array_5) + del matmul_61 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_57, parameter_235, False, False) + del parameter_235 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_72 = paddle._C_ops.reshape(matmul_62, full_int_array_5) + del matmul_62 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_63 = paddle._C_ops.matmul(dropout_2, parameter_233, False, False) + del parameter_233 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(matmul_63, full_int_array_6) + del matmul_63 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_91 = paddle._C_ops.add(reshape_70, parameter_230) + del parameter_230 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_62 = [add_91, reshape_71] + del add_91, reshape_71 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_183, einsum_184, einsum_185 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_62, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_62 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_244, + split_245, + ) = einsum_184 + del einsum_184 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_246, + split_247, + ) = einsum_185 + del einsum_185 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_92 = paddle._C_ops.add(reshape_70, parameter_232) + del parameter_232 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_63 = [add_92, reshape_73] + del add_92, reshape_73 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_186, einsum_187, einsum_188 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_63, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_63 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_248, + split_249, + ) = einsum_187 + del einsum_187 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_250, + split_251, + ) = einsum_188 + del einsum_188 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(einsum_186, full_int_array_7) + del einsum_186 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + reshape_74, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_74 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(slice_10, full_int_array_9) + del slice_10 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_10 = paddle._C_ops.index_select(reshape_75, arange_2, 3) + del reshape_75 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_93 = paddle._C_ops.add(reshape_70, parameter_231) + del parameter_231, reshape_70 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_64 = [add_93, parameter_229] + del add_93, parameter_229 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_189, einsum_190, einsum_191 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_64, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_64 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_252, + split_253, + ) = einsum_190 + del einsum_190 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_254, + split_255, + ) = einsum_191 + del einsum_191 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_65 = [cast_5, einsum_189] + del einsum_189 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_192, einsum_193, einsum_194 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_65, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_65 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_256, + split_257, + ) = einsum_193 + del einsum_193 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_258, + split_259, + ) = einsum_194 + del einsum_194 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_94 = paddle._C_ops.add(einsum_183, index_select_10) + del einsum_183, index_select_10 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_95 = paddle._C_ops.add(add_94, einsum_192) + del add_94, einsum_192 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_14 = paddle._C_ops.scale(add_95, full_16, float("0"), True) + del add_95 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_10 = paddle._C_ops.subtract(scale_14, scale_4) + del scale_14 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_10 = paddle._C_ops.softmax(subtract_10, 3) + del subtract_10 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_66 = [dropout_84, reshape_72] + del dropout_84, reshape_72 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_195, einsum_196, einsum_197 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_66, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_66 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_260, + split_261, + ) = einsum_196 + del einsum_196 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_262, + split_263, + ) = einsum_197 + del einsum_197 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(einsum_195, full_int_array_10) + del einsum_195 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_67 = [reshape_76, parameter_234] + del parameter_234, reshape_76 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_198, einsum_199, einsum_200 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_67, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_67 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_264, + split_265, + ) = einsum_199 + del einsum_199 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_266, + split_267, + ) = einsum_200 + del einsum_200 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_198, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_198 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_96 = paddle._C_ops.add(dropout_86, layer_norm_57) + del dropout_86, layer_norm_57 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_96, parameter_228, parameter_227, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_96, parameter_227, parameter_228 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_60, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_97 = paddle._C_ops.add(matmul_64, parameter_223) + del matmul_64, parameter_223 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_10 = paddle._C_ops.gelu(add_97, False) + del add_97 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_10 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_65 = paddle._C_ops.matmul(dropout_88, parameter_222, False, False) + del dropout_88, parameter_222 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_98 = paddle._C_ops.add(matmul_65, parameter_221) + del matmul_65, parameter_221 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_98, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_98 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_99 = paddle._C_ops.add(dropout_90, layer_norm_60) + del dropout_90, layer_norm_60 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_99, parameter_226, parameter_225, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_99, parameter_225, parameter_226 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_63, parameter_220, False, False) + del parameter_220 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(matmul_66, full_int_array_5) + del matmul_66 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_63, parameter_219, False, False) + del parameter_219 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(matmul_67, full_int_array_5) + del matmul_67 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_68 = paddle._C_ops.matmul(layer_norm_63, parameter_218, False, False) + del parameter_218 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(matmul_68, full_int_array_5) + del matmul_68 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_69 = paddle._C_ops.matmul(dropout_2, parameter_216, False, False) + del parameter_216 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_80 = paddle._C_ops.reshape(matmul_69, full_int_array_6) + del matmul_69 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_100 = paddle._C_ops.add(reshape_77, parameter_213) + del parameter_213 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_68 = [add_100, reshape_78] + del add_100, reshape_78 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_201, einsum_202, einsum_203 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_68, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_68 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_268, + split_269, + ) = einsum_202 + del einsum_202 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_270, + split_271, + ) = einsum_203 + del einsum_203 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_101 = paddle._C_ops.add(reshape_77, parameter_215) + del parameter_215 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_69 = [add_101, reshape_80] + del add_101, reshape_80 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_204, einsum_205, einsum_206 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_69, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_69 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_272, + split_273, + ) = einsum_205 + del einsum_205 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_274, + split_275, + ) = einsum_206 + del einsum_206 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(einsum_204, full_int_array_7) + del einsum_204 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + reshape_81, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_81 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(slice_11, full_int_array_9) + del slice_11 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_11 = paddle._C_ops.index_select(reshape_82, arange_2, 3) + del reshape_82 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_102 = paddle._C_ops.add(reshape_77, parameter_214) + del parameter_214, reshape_77 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_70 = [add_102, parameter_212] + del add_102, parameter_212 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_207, einsum_208, einsum_209 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_70, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_70 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_276, + split_277, + ) = einsum_208 + del einsum_208 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_278, + split_279, + ) = einsum_209 + del einsum_209 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_71 = [cast_5, einsum_207] + del einsum_207 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_210, einsum_211, einsum_212 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_71, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_71 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_280, + split_281, + ) = einsum_211 + del einsum_211 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_282, + split_283, + ) = einsum_212 + del einsum_212 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_103 = paddle._C_ops.add(einsum_201, index_select_11) + del einsum_201, index_select_11 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_104 = paddle._C_ops.add(add_103, einsum_210) + del add_103, einsum_210 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_15 = paddle._C_ops.scale(add_104, full_16, float("0"), True) + del add_104 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_11 = paddle._C_ops.subtract(scale_15, scale_4) + del scale_15 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_11 = paddle._C_ops.softmax(subtract_11, 3) + del subtract_11 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_72 = [dropout_92, reshape_79] + del dropout_92, reshape_79 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_213, einsum_214, einsum_215 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_72, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_72 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_284, + split_285, + ) = einsum_214 + del einsum_214 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_286, + split_287, + ) = einsum_215 + del einsum_215 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(einsum_213, full_int_array_10) + del einsum_213 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_73 = [reshape_83, parameter_217] + del parameter_217, reshape_83 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_216, einsum_217, einsum_218 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_73, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_73 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_288, + split_289, + ) = einsum_217 + del einsum_217 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_290, + split_291, + ) = einsum_218 + del einsum_218 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_216, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_216 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_105 = paddle._C_ops.add(dropout_94, layer_norm_63) + del dropout_94, layer_norm_63 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_105, parameter_211, parameter_210, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_105, parameter_210, parameter_211 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_66, parameter_207, False, False) + del parameter_207 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_106 = paddle._C_ops.add(matmul_70, parameter_206) + del matmul_70, parameter_206 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_11 = paddle._C_ops.gelu(add_106, False) + del add_106 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_11 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_71 = paddle._C_ops.matmul(dropout_96, parameter_205, False, False) + del dropout_96, parameter_205 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_107 = paddle._C_ops.add(matmul_71, parameter_204) + del matmul_71, parameter_204 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_107, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_107 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_108 = paddle._C_ops.add(dropout_98, layer_norm_66) + del dropout_98, layer_norm_66 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_108, parameter_209, parameter_208, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_108, parameter_208, parameter_209 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_69, parameter_203, False, False) + del parameter_203 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_84 = paddle._C_ops.reshape(matmul_72, full_int_array_5) + del matmul_72 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_69, parameter_202, False, False) + del parameter_202 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_85 = paddle._C_ops.reshape(matmul_73, full_int_array_5) + del matmul_73 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_69, parameter_201, False, False) + del parameter_201 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_86 = paddle._C_ops.reshape(matmul_74, full_int_array_5) + del matmul_74 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_75 = paddle._C_ops.matmul(dropout_2, parameter_199, False, False) + del parameter_199 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(matmul_75, full_int_array_6) + del matmul_75 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_109 = paddle._C_ops.add(reshape_84, parameter_196) + del parameter_196 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_74 = [add_109, reshape_85] + del add_109, reshape_85 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_219, einsum_220, einsum_221 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_74, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_74 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_292, + split_293, + ) = einsum_220 + del einsum_220 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_294, + split_295, + ) = einsum_221 + del einsum_221 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_110 = paddle._C_ops.add(reshape_84, parameter_198) + del parameter_198 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_75 = [add_110, reshape_87] + del add_110, reshape_87 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_222, einsum_223, einsum_224 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_75, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_75 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_296, + split_297, + ) = einsum_223 + del einsum_223 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_298, + split_299, + ) = einsum_224 + del einsum_224 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(einsum_222, full_int_array_7) + del einsum_222 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + reshape_88, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_88 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_89 = paddle._C_ops.reshape(slice_12, full_int_array_9) + del slice_12 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_12 = paddle._C_ops.index_select(reshape_89, arange_2, 3) + del reshape_89 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_111 = paddle._C_ops.add(reshape_84, parameter_197) + del parameter_197, reshape_84 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_76 = [add_111, parameter_195] + del add_111, parameter_195 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_225, einsum_226, einsum_227 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_76, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_76 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_300, + split_301, + ) = einsum_226 + del einsum_226 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_302, + split_303, + ) = einsum_227 + del einsum_227 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_77 = [cast_5, einsum_225] + del einsum_225 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_228, einsum_229, einsum_230 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_77, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_77 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_304, + split_305, + ) = einsum_229 + del einsum_229 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_306, + split_307, + ) = einsum_230 + del einsum_230 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_112 = paddle._C_ops.add(einsum_219, index_select_12) + del einsum_219, index_select_12 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_113 = paddle._C_ops.add(add_112, einsum_228) + del add_112, einsum_228 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_16 = paddle._C_ops.scale(add_113, full_16, float("0"), True) + del add_113 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_12 = paddle._C_ops.subtract(scale_16, scale_4) + del scale_16 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_12 = paddle._C_ops.softmax(subtract_12, 3) + del subtract_12 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_12, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_12 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_78 = [dropout_100, reshape_86] + del dropout_100, reshape_86 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_231, einsum_232, einsum_233 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_78, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_78 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_308, + split_309, + ) = einsum_232 + del einsum_232 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_310, + split_311, + ) = einsum_233 + del einsum_233 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_90 = paddle._C_ops.reshape(einsum_231, full_int_array_10) + del einsum_231 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_79 = [reshape_90, parameter_200] + del parameter_200, reshape_90 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_234, einsum_235, einsum_236 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_79, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_79 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_312, + split_313, + ) = einsum_235 + del einsum_235 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_314, + split_315, + ) = einsum_236 + del einsum_236 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_102, dropout_103 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_234, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_234 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_114 = paddle._C_ops.add(dropout_102, layer_norm_69) + del dropout_102, layer_norm_69 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_114, parameter_194, parameter_193, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_114, parameter_193, parameter_194 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_76 = paddle._C_ops.matmul(layer_norm_72, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_115 = paddle._C_ops.add(matmul_76, parameter_189) + del matmul_76, parameter_189 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_12 = paddle._C_ops.gelu(add_115, False) + del add_115 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_104, dropout_105 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_12, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_12 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_77 = paddle._C_ops.matmul(dropout_104, parameter_188, False, False) + del dropout_104, parameter_188 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_116 = paddle._C_ops.add(matmul_77, parameter_187) + del matmul_77, parameter_187 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_106, dropout_107 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_116, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_116 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_117 = paddle._C_ops.add(dropout_106, layer_norm_72) + del dropout_106, layer_norm_72 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_117, parameter_192, parameter_191, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_117, parameter_191, parameter_192 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_75, parameter_186, False, False) + del parameter_186 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(matmul_78, full_int_array_5) + del matmul_78 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_75, parameter_185, False, False) + del parameter_185 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(matmul_79, full_int_array_5) + del matmul_79 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_75, parameter_184, False, False) + del parameter_184 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_93 = paddle._C_ops.reshape(matmul_80, full_int_array_5) + del matmul_80 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_81 = paddle._C_ops.matmul(dropout_2, parameter_182, False, False) + del parameter_182 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(matmul_81, full_int_array_6) + del matmul_81 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_118 = paddle._C_ops.add(reshape_91, parameter_179) + del parameter_179 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_80 = [add_118, reshape_92] + del add_118, reshape_92 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_237, einsum_238, einsum_239 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_80, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_80 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_316, + split_317, + ) = einsum_238 + del einsum_238 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_318, + split_319, + ) = einsum_239 + del einsum_239 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_119 = paddle._C_ops.add(reshape_91, parameter_181) + del parameter_181 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_81 = [add_119, reshape_94] + del add_119, reshape_94 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_240, einsum_241, einsum_242 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_81, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_81 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_320, + split_321, + ) = einsum_241 + del einsum_241 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_322, + split_323, + ) = einsum_242 + del einsum_242 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(einsum_240, full_int_array_7) + del einsum_240 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + reshape_95, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_95 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(slice_13, full_int_array_9) + del slice_13 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_13 = paddle._C_ops.index_select(reshape_96, arange_2, 3) + del reshape_96 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_120 = paddle._C_ops.add(reshape_91, parameter_180) + del parameter_180, reshape_91 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_82 = [add_120, parameter_178] + del add_120, parameter_178 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_243, einsum_244, einsum_245 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_82, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_82 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_324, + split_325, + ) = einsum_244 + del einsum_244 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_326, + split_327, + ) = einsum_245 + del einsum_245 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_83 = [cast_5, einsum_243] + del einsum_243 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_246, einsum_247, einsum_248 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_83, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_83 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_328, + split_329, + ) = einsum_247 + del einsum_247 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_330, + split_331, + ) = einsum_248 + del einsum_248 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_121 = paddle._C_ops.add(einsum_237, index_select_13) + del einsum_237, index_select_13 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_122 = paddle._C_ops.add(add_121, einsum_246) + del add_121, einsum_246 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_17 = paddle._C_ops.scale(add_122, full_16, float("0"), True) + del add_122 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_13 = paddle._C_ops.subtract(scale_17, scale_4) + del scale_17 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_13 = paddle._C_ops.softmax(subtract_13, 3) + del subtract_13 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_108, dropout_109 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_13 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_84 = [dropout_108, reshape_93] + del dropout_108, reshape_93 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_249, einsum_250, einsum_251 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_84, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_84 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_332, + split_333, + ) = einsum_250 + del einsum_250 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_334, + split_335, + ) = einsum_251 + del einsum_251 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(einsum_249, full_int_array_10) + del einsum_249 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_85 = [reshape_97, parameter_183] + del parameter_183, reshape_97 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_252, einsum_253, einsum_254 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_85, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_85 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_336, + split_337, + ) = einsum_253 + del einsum_253 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_338, + split_339, + ) = einsum_254 + del einsum_254 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_110, dropout_111 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_252, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_252 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_123 = paddle._C_ops.add(dropout_110, layer_norm_75) + del dropout_110, layer_norm_75 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_177, parameter_176, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_123, parameter_176, parameter_177 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_78, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_124 = paddle._C_ops.add(matmul_82, parameter_172) + del matmul_82, parameter_172 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_13 = paddle._C_ops.gelu(add_124, False) + del add_124 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_112, dropout_113 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_13 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_83 = paddle._C_ops.matmul(dropout_112, parameter_171, False, False) + del dropout_112, parameter_171 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_125 = paddle._C_ops.add(matmul_83, parameter_170) + del matmul_83, parameter_170 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_114, dropout_115 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_125, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_125 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_126 = paddle._C_ops.add(dropout_114, layer_norm_78) + del dropout_114, layer_norm_78 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_175, parameter_174, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_126, parameter_174, parameter_175 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_81, parameter_169, False, False) + del parameter_169 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(matmul_84, full_int_array_5) + del matmul_84 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_81, parameter_168, False, False) + del parameter_168 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(matmul_85, full_int_array_5) + del matmul_85 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_86 = paddle._C_ops.matmul(layer_norm_81, parameter_167, False, False) + del parameter_167 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_100 = paddle._C_ops.reshape(matmul_86, full_int_array_5) + del matmul_86 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_87 = paddle._C_ops.matmul(dropout_2, parameter_165, False, False) + del parameter_165 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_101 = paddle._C_ops.reshape(matmul_87, full_int_array_6) + del matmul_87 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_127 = paddle._C_ops.add(reshape_98, parameter_162) + del parameter_162 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_86 = [add_127, reshape_99] + del add_127, reshape_99 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_255, einsum_256, einsum_257 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_86, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_86 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_340, + split_341, + ) = einsum_256 + del einsum_256 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_342, + split_343, + ) = einsum_257 + del einsum_257 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_128 = paddle._C_ops.add(reshape_98, parameter_164) + del parameter_164 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_87 = [add_128, reshape_101] + del add_128, reshape_101 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_258, einsum_259, einsum_260 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_87, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_87 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_344, + split_345, + ) = einsum_259 + del einsum_259 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_346, + split_347, + ) = einsum_260 + del einsum_260 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(einsum_258, full_int_array_7) + del einsum_258 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + reshape_102, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_102 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_103 = paddle._C_ops.reshape(slice_14, full_int_array_9) + del slice_14 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_14 = paddle._C_ops.index_select(reshape_103, arange_2, 3) + del reshape_103 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_129 = paddle._C_ops.add(reshape_98, parameter_163) + del parameter_163, reshape_98 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_88 = [add_129, parameter_161] + del add_129, parameter_161 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_261, einsum_262, einsum_263 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_88, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_88 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_348, + split_349, + ) = einsum_262 + del einsum_262 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_350, + split_351, + ) = einsum_263 + del einsum_263 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_89 = [cast_5, einsum_261] + del einsum_261 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_264, einsum_265, einsum_266 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_89, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_89 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_352, + split_353, + ) = einsum_265 + del einsum_265 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_354, + split_355, + ) = einsum_266 + del einsum_266 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_130 = paddle._C_ops.add(einsum_255, index_select_14) + del einsum_255, index_select_14 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_131 = paddle._C_ops.add(add_130, einsum_264) + del add_130, einsum_264 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_18 = paddle._C_ops.scale(add_131, full_16, float("0"), True) + del add_131 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_14 = paddle._C_ops.subtract(scale_18, scale_4) + del scale_18 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_14 = paddle._C_ops.softmax(subtract_14, 3) + del subtract_14 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_116, dropout_117 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_14, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_14 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_90 = [dropout_116, reshape_100] + del dropout_116, reshape_100 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_267, einsum_268, einsum_269 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_90, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_90 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_356, + split_357, + ) = einsum_268 + del einsum_268 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_358, + split_359, + ) = einsum_269 + del einsum_269 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_104 = paddle._C_ops.reshape(einsum_267, full_int_array_10) + del einsum_267 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_91 = [reshape_104, parameter_166] + del parameter_166, reshape_104 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_270, einsum_271, einsum_272 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_91, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_91 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_360, + split_361, + ) = einsum_271 + del einsum_271 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_362, + split_363, + ) = einsum_272 + del einsum_272 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_118, dropout_119 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_270, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_270 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_132 = paddle._C_ops.add(dropout_118, layer_norm_81) + del dropout_118, layer_norm_81 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_160, parameter_159, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_132, parameter_159, parameter_160 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_88 = paddle._C_ops.matmul(layer_norm_84, parameter_156, False, False) + del parameter_156 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_133 = paddle._C_ops.add(matmul_88, parameter_155) + del matmul_88, parameter_155 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_14 = paddle._C_ops.gelu(add_133, False) + del add_133 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_120, dropout_121 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_14, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_14 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_89 = paddle._C_ops.matmul(dropout_120, parameter_154, False, False) + del dropout_120, parameter_154 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_134 = paddle._C_ops.add(matmul_89, parameter_153) + del matmul_89, parameter_153 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_122, dropout_123 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_134, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_134 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_135 = paddle._C_ops.add(dropout_122, layer_norm_84) + del dropout_122, layer_norm_84 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_135, parameter_158, parameter_157, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_135, parameter_157, parameter_158 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_87, parameter_152, False, False) + del parameter_152 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(matmul_90, full_int_array_5) + del matmul_90 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_87, parameter_151, False, False) + del parameter_151 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(matmul_91, full_int_array_5) + del matmul_91 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_92 = paddle._C_ops.matmul(layer_norm_87, parameter_150, False, False) + del parameter_150 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(matmul_92, full_int_array_5) + del matmul_92 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_93 = paddle._C_ops.matmul(dropout_2, parameter_148, False, False) + del parameter_148 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(matmul_93, full_int_array_6) + del matmul_93 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_136 = paddle._C_ops.add(reshape_105, parameter_145) + del parameter_145 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_92 = [add_136, reshape_106] + del add_136, reshape_106 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_273, einsum_274, einsum_275 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_92, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_92 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_364, + split_365, + ) = einsum_274 + del einsum_274 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_366, + split_367, + ) = einsum_275 + del einsum_275 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_137 = paddle._C_ops.add(reshape_105, parameter_147) + del parameter_147 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_93 = [add_137, reshape_108] + del add_137, reshape_108 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_276, einsum_277, einsum_278 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_93, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_93 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_368, + split_369, + ) = einsum_277 + del einsum_277 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_370, + split_371, + ) = einsum_278 + del einsum_278 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_109 = paddle._C_ops.reshape(einsum_276, full_int_array_7) + del einsum_276 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + reshape_109, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_109 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_110 = paddle._C_ops.reshape(slice_15, full_int_array_9) + del slice_15 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_15 = paddle._C_ops.index_select(reshape_110, arange_2, 3) + del reshape_110 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_138 = paddle._C_ops.add(reshape_105, parameter_146) + del parameter_146, reshape_105 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_94 = [add_138, parameter_144] + del add_138, parameter_144 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_279, einsum_280, einsum_281 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_94, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_94 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_372, + split_373, + ) = einsum_280 + del einsum_280 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_374, + split_375, + ) = einsum_281 + del einsum_281 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_95 = [cast_5, einsum_279] + del einsum_279 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_282, einsum_283, einsum_284 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_95, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_95 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_376, + split_377, + ) = einsum_283 + del einsum_283 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_378, + split_379, + ) = einsum_284 + del einsum_284 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_139 = paddle._C_ops.add(einsum_273, index_select_15) + del einsum_273, index_select_15 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_140 = paddle._C_ops.add(add_139, einsum_282) + del add_139, einsum_282 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_19 = paddle._C_ops.scale(add_140, full_16, float("0"), True) + del add_140 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_15 = paddle._C_ops.subtract(scale_19, scale_4) + del scale_19 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_15 = paddle._C_ops.softmax(subtract_15, 3) + del subtract_15 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_124, dropout_125 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_15 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_96 = [dropout_124, reshape_107] + del dropout_124, reshape_107 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_285, einsum_286, einsum_287 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_96, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_96 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_380, + split_381, + ) = einsum_286 + del einsum_286 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_382, + split_383, + ) = einsum_287 + del einsum_287 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(einsum_285, full_int_array_10) + del einsum_285 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_97 = [reshape_111, parameter_149] + del parameter_149, reshape_111 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_288, einsum_289, einsum_290 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_97, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_97 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_384, + split_385, + ) = einsum_289 + del einsum_289 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_386, + split_387, + ) = einsum_290 + del einsum_290 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_126, dropout_127 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_288, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_288 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_141 = paddle._C_ops.add(dropout_126, layer_norm_87) + del dropout_126, layer_norm_87 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_141, parameter_143, parameter_142, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_141, parameter_142, parameter_143 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_94 = paddle._C_ops.matmul(layer_norm_90, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_142 = paddle._C_ops.add(matmul_94, parameter_138) + del matmul_94, parameter_138 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_15 = paddle._C_ops.gelu(add_142, False) + del add_142 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_128, dropout_129 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_15 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_95 = paddle._C_ops.matmul(dropout_128, parameter_137, False, False) + del dropout_128, parameter_137 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_143 = paddle._C_ops.add(matmul_95, parameter_136) + del matmul_95, parameter_136 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_130, dropout_131 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_143, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_143 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_144 = paddle._C_ops.add(dropout_130, layer_norm_90) + del dropout_130, layer_norm_90 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_144, parameter_141, parameter_140, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_144, parameter_140, parameter_141 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_93, parameter_135, False, False) + del parameter_135 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(matmul_96, full_int_array_5) + del matmul_96 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_97 = paddle._C_ops.matmul(layer_norm_93, parameter_134, False, False) + del parameter_134 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(matmul_97, full_int_array_5) + del matmul_97 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_98 = paddle._C_ops.matmul(layer_norm_93, parameter_133, False, False) + del parameter_133 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_114 = paddle._C_ops.reshape(matmul_98, full_int_array_5) + del matmul_98 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_99 = paddle._C_ops.matmul(dropout_2, parameter_131, False, False) + del parameter_131 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(matmul_99, full_int_array_6) + del matmul_99 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_145 = paddle._C_ops.add(reshape_112, parameter_128) + del parameter_128 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_98 = [add_145, reshape_113] + del add_145, reshape_113 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_291, einsum_292, einsum_293 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_98, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_98 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_388, + split_389, + ) = einsum_292 + del einsum_292 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_390, + split_391, + ) = einsum_293 + del einsum_293 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_146 = paddle._C_ops.add(reshape_112, parameter_130) + del parameter_130 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_99 = [add_146, reshape_115] + del add_146, reshape_115 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_294, einsum_295, einsum_296 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_99, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_99 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_392, + split_393, + ) = einsum_295 + del einsum_295 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_394, + split_395, + ) = einsum_296 + del einsum_296 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(einsum_294, full_int_array_7) + del einsum_294 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + reshape_116, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_116 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(slice_16, full_int_array_9) + del slice_16 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_16 = paddle._C_ops.index_select(reshape_117, arange_2, 3) + del reshape_117 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_147 = paddle._C_ops.add(reshape_112, parameter_129) + del parameter_129, reshape_112 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_100 = [add_147, parameter_127] + del add_147, parameter_127 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_297, einsum_298, einsum_299 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_100, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_100 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_396, + split_397, + ) = einsum_298 + del einsum_298 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_398, + split_399, + ) = einsum_299 + del einsum_299 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_101 = [cast_5, einsum_297] + del einsum_297 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_300, einsum_301, einsum_302 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_101, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_101 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_400, + split_401, + ) = einsum_301 + del einsum_301 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_402, + split_403, + ) = einsum_302 + del einsum_302 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_148 = paddle._C_ops.add(einsum_291, index_select_16) + del einsum_291, index_select_16 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_149 = paddle._C_ops.add(add_148, einsum_300) + del add_148, einsum_300 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_20 = paddle._C_ops.scale(add_149, full_16, float("0"), True) + del add_149 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_16 = paddle._C_ops.subtract(scale_20, scale_4) + del scale_20 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_16 = paddle._C_ops.softmax(subtract_16, 3) + del subtract_16 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_132, dropout_133 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_16, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_16 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_102 = [dropout_132, reshape_114] + del dropout_132, reshape_114 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_303, einsum_304, einsum_305 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_102, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_102 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_404, + split_405, + ) = einsum_304 + del einsum_304 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_406, + split_407, + ) = einsum_305 + del einsum_305 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(einsum_303, full_int_array_10) + del einsum_303 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_103 = [reshape_118, parameter_132] + del parameter_132, reshape_118 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_306, einsum_307, einsum_308 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_103, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_103 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_408, + split_409, + ) = einsum_307 + del einsum_307 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_410, + split_411, + ) = einsum_308 + del einsum_308 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_134, dropout_135 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_306, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_306 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_150 = paddle._C_ops.add(dropout_134, layer_norm_93) + del dropout_134, layer_norm_93 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_150, parameter_126, parameter_125, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_150, parameter_125, parameter_126 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_96, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_151 = paddle._C_ops.add(matmul_100, parameter_121) + del matmul_100, parameter_121 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_16 = paddle._C_ops.gelu(add_151, False) + del add_151 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_136, dropout_137 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_16, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_16 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_101 = paddle._C_ops.matmul(dropout_136, parameter_120, False, False) + del dropout_136, parameter_120 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_152 = paddle._C_ops.add(matmul_101, parameter_119) + del matmul_101, parameter_119 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_138, dropout_139 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_152, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_152 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_153 = paddle._C_ops.add(dropout_138, layer_norm_96) + del dropout_138, layer_norm_96 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_153, parameter_124, parameter_123, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_153, parameter_123, parameter_124 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_99, parameter_118, False, False) + del parameter_118 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(matmul_102, full_int_array_5) + del matmul_102 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_103 = paddle._C_ops.matmul(layer_norm_99, parameter_117, False, False) + del parameter_117 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_120 = paddle._C_ops.reshape(matmul_103, full_int_array_5) + del matmul_103 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_104 = paddle._C_ops.matmul(layer_norm_99, parameter_116, False, False) + del parameter_116 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(matmul_104, full_int_array_5) + del matmul_104 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_105 = paddle._C_ops.matmul(dropout_2, parameter_114, False, False) + del parameter_114 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(matmul_105, full_int_array_6) + del matmul_105 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_154 = paddle._C_ops.add(reshape_119, parameter_111) + del parameter_111 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_104 = [add_154, reshape_120] + del add_154, reshape_120 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_309, einsum_310, einsum_311 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_104, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_104 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_412, + split_413, + ) = einsum_310 + del einsum_310 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_414, + split_415, + ) = einsum_311 + del einsum_311 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_155 = paddle._C_ops.add(reshape_119, parameter_113) + del parameter_113 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_105 = [add_155, reshape_122] + del add_155, reshape_122 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_312, einsum_313, einsum_314 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_105, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_105 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_416, + split_417, + ) = einsum_313 + del einsum_313 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_418, + split_419, + ) = einsum_314 + del einsum_314 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(einsum_312, full_int_array_7) + del einsum_312 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + reshape_123, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_123 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(slice_17, full_int_array_9) + del slice_17 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_17 = paddle._C_ops.index_select(reshape_124, arange_2, 3) + del reshape_124 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_156 = paddle._C_ops.add(reshape_119, parameter_112) + del parameter_112, reshape_119 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_106 = [add_156, parameter_110] + del add_156, parameter_110 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_315, einsum_316, einsum_317 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_106, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_106 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_420, + split_421, + ) = einsum_316 + del einsum_316 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_422, + split_423, + ) = einsum_317 + del einsum_317 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_107 = [cast_5, einsum_315] + del einsum_315 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_318, einsum_319, einsum_320 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_107, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_107 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_424, + split_425, + ) = einsum_319 + del einsum_319 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_426, + split_427, + ) = einsum_320 + del einsum_320 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_157 = paddle._C_ops.add(einsum_309, index_select_17) + del einsum_309, index_select_17 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_158 = paddle._C_ops.add(add_157, einsum_318) + del add_157, einsum_318 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_21 = paddle._C_ops.scale(add_158, full_16, float("0"), True) + del add_158 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_17 = paddle._C_ops.subtract(scale_21, scale_4) + del scale_21 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_17 = paddle._C_ops.softmax(subtract_17, 3) + del subtract_17 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_140, dropout_141 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_17 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_108 = [dropout_140, reshape_121] + del dropout_140, reshape_121 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_321, einsum_322, einsum_323 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_108, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_108 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_428, + split_429, + ) = einsum_322 + del einsum_322 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_430, + split_431, + ) = einsum_323 + del einsum_323 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_125 = paddle._C_ops.reshape(einsum_321, full_int_array_10) + del einsum_321 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_109 = [reshape_125, parameter_115] + del parameter_115, reshape_125 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_324, einsum_325, einsum_326 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_109, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_109 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_432, + split_433, + ) = einsum_325 + del einsum_325 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_434, + split_435, + ) = einsum_326 + del einsum_326 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_142, dropout_143 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_324, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_324 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_159 = paddle._C_ops.add(dropout_142, layer_norm_99) + del dropout_142, layer_norm_99 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_159, parameter_109, parameter_108, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_159, parameter_108, parameter_109 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_106 = paddle._C_ops.matmul(layer_norm_102, parameter_105, False, False) + del parameter_105 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_160 = paddle._C_ops.add(matmul_106, parameter_104) + del matmul_106, parameter_104 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_17 = paddle._C_ops.gelu(add_160, False) + del add_160 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_144, dropout_145 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_17 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_107 = paddle._C_ops.matmul(dropout_144, parameter_103, False, False) + del dropout_144, parameter_103 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_161 = paddle._C_ops.add(matmul_107, parameter_102) + del matmul_107, parameter_102 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_146, dropout_147 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_161, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_161 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_162 = paddle._C_ops.add(dropout_146, layer_norm_102) + del dropout_146, layer_norm_102 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_162, parameter_107, parameter_106, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_162, parameter_106, parameter_107 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_108 = paddle._C_ops.matmul(layer_norm_105, parameter_101, False, False) + del parameter_101 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_126 = paddle._C_ops.reshape(matmul_108, full_int_array_5) + del matmul_108 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_109 = paddle._C_ops.matmul(layer_norm_105, parameter_100, False, False) + del parameter_100 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_127 = paddle._C_ops.reshape(matmul_109, full_int_array_5) + del matmul_109 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_105, parameter_99, False, False) + del parameter_99 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_128 = paddle._C_ops.reshape(matmul_110, full_int_array_5) + del matmul_110 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_111 = paddle._C_ops.matmul(dropout_2, parameter_97, False, False) + del parameter_97 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(matmul_111, full_int_array_6) + del matmul_111 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_163 = paddle._C_ops.add(reshape_126, parameter_94) + del parameter_94 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_110 = [add_163, reshape_127] + del add_163, reshape_127 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_327, einsum_328, einsum_329 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_110, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_110 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_436, + split_437, + ) = einsum_328 + del einsum_328 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_438, + split_439, + ) = einsum_329 + del einsum_329 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_164 = paddle._C_ops.add(reshape_126, parameter_96) + del parameter_96 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_111 = [add_164, reshape_129] + del add_164, reshape_129 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_330, einsum_331, einsum_332 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_111, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_111 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_440, + split_441, + ) = einsum_331 + del einsum_331 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_442, + split_443, + ) = einsum_332 + del einsum_332 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(einsum_330, full_int_array_7) + del einsum_330 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + reshape_130, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_130 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(slice_18, full_int_array_9) + del slice_18 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_18 = paddle._C_ops.index_select(reshape_131, arange_2, 3) + del reshape_131 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_165 = paddle._C_ops.add(reshape_126, parameter_95) + del parameter_95, reshape_126 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_112 = [add_165, parameter_93] + del add_165, parameter_93 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_333, einsum_334, einsum_335 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_112, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_112 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_444, + split_445, + ) = einsum_334 + del einsum_334 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_446, + split_447, + ) = einsum_335 + del einsum_335 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_113 = [cast_5, einsum_333] + del einsum_333 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_336, einsum_337, einsum_338 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_113, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_113 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_448, + split_449, + ) = einsum_337 + del einsum_337 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_450, + split_451, + ) = einsum_338 + del einsum_338 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_166 = paddle._C_ops.add(einsum_327, index_select_18) + del einsum_327, index_select_18 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_167 = paddle._C_ops.add(add_166, einsum_336) + del add_166, einsum_336 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_22 = paddle._C_ops.scale(add_167, full_16, float("0"), True) + del add_167 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_18 = paddle._C_ops.subtract(scale_22, scale_4) + del scale_22 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_18 = paddle._C_ops.softmax(subtract_18, 3) + del subtract_18 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_148, dropout_149 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_18 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_114 = [dropout_148, reshape_128] + del dropout_148, reshape_128 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_339, einsum_340, einsum_341 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_114, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_114 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_452, + split_453, + ) = einsum_340 + del einsum_340 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_454, + split_455, + ) = einsum_341 + del einsum_341 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_132 = paddle._C_ops.reshape(einsum_339, full_int_array_10) + del einsum_339 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_115 = [reshape_132, parameter_98] + del parameter_98, reshape_132 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_342, einsum_343, einsum_344 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_115, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_115 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_456, + split_457, + ) = einsum_343 + del einsum_343 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_458, + split_459, + ) = einsum_344 + del einsum_344 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_150, dropout_151 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_342, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_342 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_168 = paddle._C_ops.add(dropout_150, layer_norm_105) + del dropout_150, layer_norm_105 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_92, parameter_91, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_168, parameter_91, parameter_92 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_108, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_169 = paddle._C_ops.add(matmul_112, parameter_87) + del matmul_112, parameter_87 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_18 = paddle._C_ops.gelu(add_169, False) + del add_169 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_152, dropout_153 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_18 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_113 = paddle._C_ops.matmul(dropout_152, parameter_86, False, False) + del dropout_152, parameter_86 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_170 = paddle._C_ops.add(matmul_113, parameter_85) + del matmul_113, parameter_85 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_154, dropout_155 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_170, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_170 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_171 = paddle._C_ops.add(dropout_154, layer_norm_108) + del dropout_154, layer_norm_108 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_171, parameter_90, parameter_89, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_171, parameter_89, parameter_90 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_111, parameter_84, False, False) + del parameter_84 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(matmul_114, full_int_array_5) + del matmul_114 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_115 = paddle._C_ops.matmul(layer_norm_111, parameter_83, False, False) + del parameter_83 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_134 = paddle._C_ops.reshape(matmul_115, full_int_array_5) + del matmul_115 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_111, parameter_82, False, False) + del parameter_82 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(matmul_116, full_int_array_5) + del matmul_116 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_117 = paddle._C_ops.matmul(dropout_2, parameter_80, False, False) + del parameter_80 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_136 = paddle._C_ops.reshape(matmul_117, full_int_array_6) + del matmul_117 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_172 = paddle._C_ops.add(reshape_133, parameter_77) + del parameter_77 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_116 = [add_172, reshape_134] + del add_172, reshape_134 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_345, einsum_346, einsum_347 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_116, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_116 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_460, + split_461, + ) = einsum_346 + del einsum_346 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_462, + split_463, + ) = einsum_347 + del einsum_347 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_173 = paddle._C_ops.add(reshape_133, parameter_79) + del parameter_79 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_117 = [add_173, reshape_136] + del add_173, reshape_136 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_348, einsum_349, einsum_350 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_117, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_117 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_464, + split_465, + ) = einsum_349 + del einsum_349 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_466, + split_467, + ) = einsum_350 + del einsum_350 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(einsum_348, full_int_array_7) + del einsum_348 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + reshape_137, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_137 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(slice_19, full_int_array_9) + del slice_19 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_19 = paddle._C_ops.index_select(reshape_138, arange_2, 3) + del reshape_138 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_174 = paddle._C_ops.add(reshape_133, parameter_78) + del parameter_78, reshape_133 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_118 = [add_174, parameter_76] + del add_174, parameter_76 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_351, einsum_352, einsum_353 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_118, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_118 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_468, + split_469, + ) = einsum_352 + del einsum_352 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_470, + split_471, + ) = einsum_353 + del einsum_353 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_119 = [cast_5, einsum_351] + del einsum_351 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_354, einsum_355, einsum_356 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_119, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_119 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_472, + split_473, + ) = einsum_355 + del einsum_355 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_474, + split_475, + ) = einsum_356 + del einsum_356 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_175 = paddle._C_ops.add(einsum_345, index_select_19) + del einsum_345, index_select_19 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_176 = paddle._C_ops.add(add_175, einsum_354) + del add_175, einsum_354 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_23 = paddle._C_ops.scale(add_176, full_16, float("0"), True) + del add_176 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_19 = paddle._C_ops.subtract(scale_23, scale_4) + del scale_23 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_19 = paddle._C_ops.softmax(subtract_19, 3) + del subtract_19 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_156, dropout_157 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_19, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_19 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_120 = [dropout_156, reshape_135] + del dropout_156, reshape_135 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_357, einsum_358, einsum_359 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_120, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_120 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_476, + split_477, + ) = einsum_358 + del einsum_358 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_478, + split_479, + ) = einsum_359 + del einsum_359 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_139 = paddle._C_ops.reshape(einsum_357, full_int_array_10) + del einsum_357 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_121 = [reshape_139, parameter_81] + del parameter_81, reshape_139 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_360, einsum_361, einsum_362 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_121, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_121 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_480, + split_481, + ) = einsum_361 + del einsum_361 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_482, + split_483, + ) = einsum_362 + del einsum_362 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_158, dropout_159 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_360, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_360 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_177 = paddle._C_ops.add(dropout_158, layer_norm_111) + del dropout_158, layer_norm_111 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_177, parameter_75, parameter_74, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_177, parameter_74, parameter_75 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_118 = paddle._C_ops.matmul(layer_norm_114, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_178 = paddle._C_ops.add(matmul_118, parameter_70) + del matmul_118, parameter_70 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_19 = paddle._C_ops.gelu(add_178, False) + del add_178 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_160, dropout_161 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_19, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_19 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_119 = paddle._C_ops.matmul(dropout_160, parameter_69, False, False) + del dropout_160, parameter_69 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_179 = paddle._C_ops.add(matmul_119, parameter_68) + del matmul_119, parameter_68 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_162, dropout_163 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_179, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_179 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_180 = paddle._C_ops.add(dropout_162, layer_norm_114) + del dropout_162, layer_norm_114 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_180, parameter_73, parameter_72, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_180, parameter_72, parameter_73 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_117, parameter_67, False, False) + del parameter_67 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(matmul_120, full_int_array_5) + del matmul_120 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_117, parameter_66, False, False) + del parameter_66 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(matmul_121, full_int_array_5) + del matmul_121 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_122 = paddle._C_ops.matmul(layer_norm_117, parameter_65, False, False) + del parameter_65 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(matmul_122, full_int_array_5) + del matmul_122 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_123 = paddle._C_ops.matmul(dropout_2, parameter_63, False, False) + del parameter_63 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_143 = paddle._C_ops.reshape(matmul_123, full_int_array_6) + del matmul_123 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_181 = paddle._C_ops.add(reshape_140, parameter_60) + del parameter_60 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_122 = [add_181, reshape_141] + del add_181, reshape_141 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_363, einsum_364, einsum_365 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_122, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_122 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_484, + split_485, + ) = einsum_364 + del einsum_364 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_486, + split_487, + ) = einsum_365 + del einsum_365 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_182 = paddle._C_ops.add(reshape_140, parameter_62) + del parameter_62 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_123 = [add_182, reshape_143] + del add_182, reshape_143 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_366, einsum_367, einsum_368 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_123, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_123 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_488, + split_489, + ) = einsum_367 + del einsum_367 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_490, + split_491, + ) = einsum_368 + del einsum_368 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_144 = paddle._C_ops.reshape(einsum_366, full_int_array_7) + del einsum_366 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + reshape_144, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_144 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_145 = paddle._C_ops.reshape(slice_20, full_int_array_9) + del slice_20 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_20 = paddle._C_ops.index_select(reshape_145, arange_2, 3) + del reshape_145 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_183 = paddle._C_ops.add(reshape_140, parameter_61) + del parameter_61, reshape_140 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_124 = [add_183, parameter_59] + del add_183, parameter_59 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_369, einsum_370, einsum_371 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_124, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_124 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_492, + split_493, + ) = einsum_370 + del einsum_370 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_494, + split_495, + ) = einsum_371 + del einsum_371 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_125 = [cast_5, einsum_369] + del einsum_369 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_372, einsum_373, einsum_374 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_125, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_125 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_496, + split_497, + ) = einsum_373 + del einsum_373 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_498, + split_499, + ) = einsum_374 + del einsum_374 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_184 = paddle._C_ops.add(einsum_363, index_select_20) + del einsum_363, index_select_20 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_185 = paddle._C_ops.add(add_184, einsum_372) + del add_184, einsum_372 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_24 = paddle._C_ops.scale(add_185, full_16, float("0"), True) + del add_185 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_20 = paddle._C_ops.subtract(scale_24, scale_4) + del scale_24 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_20 = paddle._C_ops.softmax(subtract_20, 3) + del subtract_20 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_164, dropout_165 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_20, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_20 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_126 = [dropout_164, reshape_142] + del dropout_164, reshape_142 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_375, einsum_376, einsum_377 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_126, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_126 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_500, + split_501, + ) = einsum_376 + del einsum_376 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_502, + split_503, + ) = einsum_377 + del einsum_377 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_146 = paddle._C_ops.reshape(einsum_375, full_int_array_10) + del einsum_375 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_127 = [reshape_146, parameter_64] + del parameter_64, reshape_146 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_378, einsum_379, einsum_380 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_127, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_127 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_504, + split_505, + ) = einsum_379 + del einsum_379 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_506, + split_507, + ) = einsum_380 + del einsum_380 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_166, dropout_167 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_378, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_378 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_186 = paddle._C_ops.add(dropout_166, layer_norm_117) + del dropout_166, layer_norm_117 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_186, parameter_58, parameter_57, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_186, parameter_57, parameter_58 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_124 = paddle._C_ops.matmul(layer_norm_120, parameter_54, False, False) + del parameter_54 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_187 = paddle._C_ops.add(matmul_124, parameter_53) + del matmul_124, parameter_53 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_20 = paddle._C_ops.gelu(add_187, False) + del add_187 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_168, dropout_169 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_20, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_20 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_125 = paddle._C_ops.matmul(dropout_168, parameter_52, False, False) + del dropout_168, parameter_52 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_188 = paddle._C_ops.add(matmul_125, parameter_51) + del matmul_125, parameter_51 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_170, dropout_171 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_188, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_188 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_189 = paddle._C_ops.add(dropout_170, layer_norm_120) + del dropout_170, layer_norm_120 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_189, parameter_56, parameter_55, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_189, parameter_55, parameter_56 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_123, parameter_50, False, False) + del parameter_50 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(matmul_126, full_int_array_5) + del matmul_126 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_127 = paddle._C_ops.matmul(layer_norm_123, parameter_49, False, False) + del parameter_49 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(matmul_127, full_int_array_5) + del matmul_127 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_128 = paddle._C_ops.matmul(layer_norm_123, parameter_48, False, False) + del parameter_48 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(matmul_128, full_int_array_5) + del matmul_128 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_129 = paddle._C_ops.matmul(dropout_2, parameter_46, False, False) + del parameter_46 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(matmul_129, full_int_array_6) + del matmul_129 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_190 = paddle._C_ops.add(reshape_147, parameter_43) + del parameter_43 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_128 = [add_190, reshape_148] + del add_190, reshape_148 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_381, einsum_382, einsum_383 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_128, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_128 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_508, + split_509, + ) = einsum_382 + del einsum_382 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_510, + split_511, + ) = einsum_383 + del einsum_383 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_191 = paddle._C_ops.add(reshape_147, parameter_45) + del parameter_45 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_129 = [add_191, reshape_150] + del add_191, reshape_150 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_384, einsum_385, einsum_386 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_129, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_129 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_512, + split_513, + ) = einsum_385 + del einsum_385 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_514, + split_515, + ) = einsum_386 + del einsum_386 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_151 = paddle._C_ops.reshape(einsum_384, full_int_array_7) + del einsum_384 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + reshape_151, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_151 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(slice_21, full_int_array_9) + del slice_21 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_21 = paddle._C_ops.index_select(reshape_152, arange_2, 3) + del reshape_152 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_192 = paddle._C_ops.add(reshape_147, parameter_44) + del parameter_44, reshape_147 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_130 = [add_192, parameter_42] + del add_192, parameter_42 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_387, einsum_388, einsum_389 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_130, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_130 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_516, + split_517, + ) = einsum_388 + del einsum_388 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_518, + split_519, + ) = einsum_389 + del einsum_389 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_131 = [cast_5, einsum_387] + del einsum_387 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_390, einsum_391, einsum_392 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_131, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_131 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_520, + split_521, + ) = einsum_391 + del einsum_391 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_522, + split_523, + ) = einsum_392 + del einsum_392 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_193 = paddle._C_ops.add(einsum_381, index_select_21) + del einsum_381, index_select_21 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_194 = paddle._C_ops.add(add_193, einsum_390) + del add_193, einsum_390 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_25 = paddle._C_ops.scale(add_194, full_16, float("0"), True) + del add_194 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_21 = paddle._C_ops.subtract(scale_25, scale_4) + del scale_25 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_21 = paddle._C_ops.softmax(subtract_21, 3) + del subtract_21 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_172, dropout_173 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_21 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_132 = [dropout_172, reshape_149] + del dropout_172, reshape_149 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_393, einsum_394, einsum_395 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_132, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_132 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_524, + split_525, + ) = einsum_394 + del einsum_394 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_526, + split_527, + ) = einsum_395 + del einsum_395 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_153 = paddle._C_ops.reshape(einsum_393, full_int_array_10) + del einsum_393 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_133 = [reshape_153, parameter_47] + del parameter_47, reshape_153 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_396, einsum_397, einsum_398 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_133, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_133 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_528, + split_529, + ) = einsum_397 + del einsum_397 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_530, + split_531, + ) = einsum_398 + del einsum_398 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_174, dropout_175 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_396, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_396 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_195 = paddle._C_ops.add(dropout_174, layer_norm_123) + del dropout_174, layer_norm_123 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_195, parameter_41, parameter_40, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_195, parameter_40, parameter_41 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_130 = paddle._C_ops.matmul(layer_norm_126, parameter_37, False, False) + del parameter_37 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_196 = paddle._C_ops.add(matmul_130, parameter_36) + del matmul_130, parameter_36 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_21 = paddle._C_ops.gelu(add_196, False) + del add_196 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_176, dropout_177 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_21 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_131 = paddle._C_ops.matmul(dropout_176, parameter_35, False, False) + del dropout_176, parameter_35 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_197 = paddle._C_ops.add(matmul_131, parameter_34) + del matmul_131, parameter_34 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_178, dropout_179 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_197, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_197 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_198 = paddle._C_ops.add(dropout_178, layer_norm_126) + del dropout_178, layer_norm_126 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_198, parameter_39, parameter_38, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_198, parameter_38, parameter_39 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_132 = paddle._C_ops.matmul(layer_norm_129, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_154 = paddle._C_ops.reshape(matmul_132, full_int_array_5) + del matmul_132 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_133 = paddle._C_ops.matmul(layer_norm_129, parameter_32, False, False) + del parameter_32 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(matmul_133, full_int_array_5) + del matmul_133 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_129, parameter_31, False, False) + del parameter_31 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(matmul_134, full_int_array_5) + del matmul_134 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_135 = paddle._C_ops.matmul(dropout_2, parameter_29, False, False) + del parameter_29 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_157 = paddle._C_ops.reshape(matmul_135, full_int_array_6) + del matmul_135 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_199 = paddle._C_ops.add(reshape_154, parameter_26) + del parameter_26 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_134 = [add_199, reshape_155] + del add_199, reshape_155 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_399, einsum_400, einsum_401 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_134, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_134 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_532, + split_533, + ) = einsum_400 + del einsum_400 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_534, + split_535, + ) = einsum_401 + del einsum_401 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_200 = paddle._C_ops.add(reshape_154, parameter_28) + del parameter_28 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_135 = [add_200, reshape_157] + del add_200, reshape_157 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_402, einsum_403, einsum_404 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_135, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_135 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_536, + split_537, + ) = einsum_403 + del einsum_403 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_538, + split_539, + ) = einsum_404 + del einsum_404 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(einsum_402, full_int_array_7) + del einsum_402 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + reshape_158, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del reshape_158 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(slice_22, full_int_array_9) + del slice_22 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_22 = paddle._C_ops.index_select(reshape_159, arange_2, 3) + del reshape_159 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_201 = paddle._C_ops.add(reshape_154, parameter_27) + del parameter_27, reshape_154 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_136 = [add_201, parameter_25] + del add_201, parameter_25 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_405, einsum_406, einsum_407 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_136, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_136 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_540, + split_541, + ) = einsum_406 + del einsum_406 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_542, + split_543, + ) = einsum_407 + del einsum_407 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_137 = [cast_5, einsum_405] + del einsum_405 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_408, einsum_409, einsum_410 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_137, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_137 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_544, + split_545, + ) = einsum_409 + del einsum_409 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_546, + split_547, + ) = einsum_410 + del einsum_410 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_202 = paddle._C_ops.add(einsum_399, index_select_22) + del einsum_399, index_select_22 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_203 = paddle._C_ops.add(add_202, einsum_408) + del add_202, einsum_408 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_26 = paddle._C_ops.scale(add_203, full_16, float("0"), True) + del add_203 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_22 = paddle._C_ops.subtract(scale_26, scale_4) + del scale_26 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_22 = paddle._C_ops.softmax(subtract_22, 3) + del subtract_22 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_180, dropout_181 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_22, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_22 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_138 = [dropout_180, reshape_156] + del dropout_180, reshape_156 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_411, einsum_412, einsum_413 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_138, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_138 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_548, + split_549, + ) = einsum_412 + del einsum_412 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_550, + split_551, + ) = einsum_413 + del einsum_413 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(einsum_411, full_int_array_10) + del einsum_411 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_139 = [reshape_160, parameter_30] + del parameter_30, reshape_160 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_414, einsum_415, einsum_416 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_139, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_139 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_552, + split_553, + ) = einsum_415 + del einsum_415 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_554, + split_555, + ) = einsum_416 + del einsum_416 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_182, dropout_183 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_414, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_414 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_204 = paddle._C_ops.add(dropout_182, layer_norm_129) + del dropout_182, layer_norm_129 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_204, parameter_24, parameter_23, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_204, parameter_23, parameter_24 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_136 = paddle._C_ops.matmul(layer_norm_132, parameter_20, False, False) + del parameter_20 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_205 = paddle._C_ops.add(matmul_136, parameter_19) + del matmul_136, parameter_19 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_22 = paddle._C_ops.gelu(add_205, False) + del add_205 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_184, dropout_185 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_22, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_22 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_137 = paddle._C_ops.matmul(dropout_184, parameter_18, False, False) + del dropout_184, parameter_18 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_206 = paddle._C_ops.add(matmul_137, parameter_17) + del matmul_137, parameter_17 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_186, dropout_187 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_206, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_206 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_207 = paddle._C_ops.add(dropout_186, layer_norm_132) + del dropout_186, layer_norm_132 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_207, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_207, parameter_21, parameter_22 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_138 = paddle._C_ops.matmul(layer_norm_135, parameter_16, False, False) + del parameter_16 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_161 = paddle._C_ops.reshape(matmul_138, full_int_array_5) + del matmul_138 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_139 = paddle._C_ops.matmul(layer_norm_135, parameter_15, False, False) + del parameter_15 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_162 = paddle._C_ops.reshape(matmul_139, full_int_array_5) + del matmul_139 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x1024xf32, 1024x1024xf32) + matmul_140 = paddle._C_ops.matmul(layer_norm_135, parameter_14, False, False) + del parameter_14 + + # pd_op.reshape: (22x1x16x64xf32) <- (22x1x1024xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(matmul_140, full_int_array_5) + del full_int_array_5, matmul_140 + + # pd_op.matmul: (44x1x1024xf32) <- (44x1x1024xf32, 1024x1024xf32) + matmul_141 = paddle._C_ops.matmul(dropout_2, parameter_12, False, False) + del dropout_2, parameter_12 + + # pd_op.reshape: (44x1x16x64xf32) <- (44x1x1024xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(matmul_141, full_int_array_6) + del full_int_array_6, matmul_141 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_208 = paddle._C_ops.add(reshape_161, parameter_9) + del parameter_9 + + # builtin.combine: ([22x1x16x64xf32, 22x1x16x64xf32]) <- (22x1x16x64xf32, 22x1x16x64xf32) + combine_140 = [add_208, reshape_162] + del add_208, reshape_162 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x1x16x64xf32, 22x1x16x64xf32]) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + einsum_417, einsum_418, einsum_419 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_140, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_140 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_556, + split_557, + ) = einsum_418 + del einsum_418 + + # builtin.split: (22x1x16x64xf32, 22x1x16x64xf32) <- ([22x1x16x64xf32, 22x1x16x64xf32]) + ( + split_558, + split_559, + ) = einsum_419 + del einsum_419 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_209 = paddle._C_ops.add(reshape_161, parameter_11) + del parameter_11 + + # builtin.combine: ([22x1x16x64xf32, 44x1x16x64xf32]) <- (22x1x16x64xf32, 44x1x16x64xf32) + combine_141 = [add_209, reshape_164] + del add_209, reshape_164 + + # pd_op.einsum: (1x16x22x44xf32, [0xf32, 0xf32], [22x1x16x64xf32, 44x1x16x64xf32]) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + einsum_420, einsum_421, einsum_422 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_141, "ibnd,jbnd->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_141 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_560, + split_561, + ) = einsum_421 + del einsum_421 + + # builtin.split: (22x1x16x64xf32, 44x1x16x64xf32) <- ([22x1x16x64xf32, 44x1x16x64xf32]) + ( + split_562, + split_563, + ) = einsum_422 + del einsum_422 + + # pd_op.reshape: (1x16x44x22xf32) <- (1x16x22x44xf32, 4xi64) + reshape_165 = paddle._C_ops.reshape(einsum_420, full_int_array_7) + del einsum_420, full_int_array_7 + + # pd_op.slice: (1x16x43x22xf32) <- (1x16x44x22xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + reshape_165, [2], full_int_array_3, full_int_array_8, [1], [] + ) + del full_int_array_3, full_int_array_8, reshape_165 + + # pd_op.reshape: (1x16x22x43xf32) <- (1x16x43x22xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(slice_23, full_int_array_9) + del full_int_array_9, slice_23 + + # pd_op.index_select: (1x16x22x22xf32) <- (1x16x22x43xf32, 22xi64) + index_select_23 = paddle._C_ops.index_select(reshape_166, arange_2, 3) + del arange_2, reshape_166 + + # pd_op.add: (22x1x16x64xf32) <- (22x1x16x64xf32, 16x64xf32) + add_210 = paddle._C_ops.add(reshape_161, parameter_10) + del parameter_10, reshape_161 + + # builtin.combine: ([22x1x16x64xf32, 2x16x64xf32]) <- (22x1x16x64xf32, 2x16x64xf32) + combine_142 = [add_210, parameter_8] + del add_210, parameter_8 + + # pd_op.einsum: (22x1x16x2xf32, [0xf32, 0xf32], [22x1x16x64xf32, 2x16x64xf32]) <- ([22x1x16x64xf32, 2x16x64xf32]) + einsum_423, einsum_424, einsum_425 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_142, "ibnd,snd->ibns"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_142 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_564, + split_565, + ) = einsum_424 + del einsum_424 + + # builtin.split: (22x1x16x64xf32, 2x16x64xf32) <- ([22x1x16x64xf32, 2x16x64xf32]) + ( + split_566, + split_567, + ) = einsum_425 + del einsum_425 + + # builtin.combine: ([22x22x1x2xf32, 22x1x16x2xf32]) <- (22x22x1x2xf32, 22x1x16x2xf32) + combine_143 = [cast_5, einsum_423] + del cast_5, einsum_423 + + # pd_op.einsum: (1x16x22x22xf32, [0xf32, 0xf32], [22x22x1x2xf32, 22x1x16x2xf32]) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + einsum_426, einsum_427, einsum_428 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_143, "ijbs,ibns->bnij"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_143 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_568, + split_569, + ) = einsum_427 + del einsum_427 + + # builtin.split: (22x22x1x2xf32, 22x1x16x2xf32) <- ([22x22x1x2xf32, 22x1x16x2xf32]) + ( + split_570, + split_571, + ) = einsum_428 + del einsum_428 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_211 = paddle._C_ops.add(einsum_417, index_select_23) + del einsum_417, index_select_23 + + # pd_op.add: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x16x22x22xf32) + add_212 = paddle._C_ops.add(add_211, einsum_426) + del add_211, einsum_426 + + # pd_op.scale: (1x16x22x22xf32) <- (1x16x22x22xf32, 1xf32) + scale_27 = paddle._C_ops.scale(add_212, full_16, float("0"), True) + del add_212, full_16 + + # pd_op.subtract: (1x16x22x22xf32) <- (1x16x22x22xf32, 1x1x22x22xf32) + subtract_23 = paddle._C_ops.subtract(scale_27, scale_4) + del scale_27, scale_4 + + # pd_op.softmax: (1x16x22x22xf32) <- (1x16x22x22xf32) + softmax_23 = paddle._C_ops.softmax(subtract_23, 3) + del subtract_23 + + # pd_op.dropout: (1x16x22x22xf32, 1x16x22x22xui8) <- (1x16x22x22xf32, None, 1xf32) + dropout_188, dropout_189 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_23 + + # builtin.combine: ([1x16x22x22xf32, 22x1x16x64xf32]) <- (1x16x22x22xf32, 22x1x16x64xf32) + combine_144 = [dropout_188, reshape_163] + del dropout_188, reshape_163 + + # pd_op.einsum: (22x1x16x64xf32, [0xf32, 0xf32], [1x16x22x22xf32, 22x1x16x64xf32]) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + einsum_429, einsum_430, einsum_431 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_144, "bnij,jbnd->ibnd"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_144 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_572, + split_573, + ) = einsum_430 + del einsum_430 + + # builtin.split: (1x16x22x22xf32, 22x1x16x64xf32) <- ([1x16x22x22xf32, 22x1x16x64xf32]) + ( + split_574, + split_575, + ) = einsum_431 + del einsum_431 + + # pd_op.reshape: (22x1x1024xf32) <- (22x1x16x64xf32, 3xi64) + reshape_167 = paddle._C_ops.reshape(einsum_429, full_int_array_10) + del einsum_429, full_int_array_10 + + # builtin.combine: ([22x1x1024xf32, 1024x1024xf32]) <- (22x1x1024xf32, 1024x1024xf32) + combine_145 = [reshape_167, parameter_13] + del parameter_13, reshape_167 + + # pd_op.einsum: (22x1x1024xf32, [0xf32, 0xf32], [22x1x1024xf32, 1024x1024xf32]) <- ([22x1x1024xf32, 1024x1024xf32]) + einsum_432, einsum_433, einsum_434 = (lambda x, f: f(x))( + paddle._C_ops.einsum(combine_145, "ibm,hm->ibh"), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del combine_145 + + # builtin.split: (0xf32, 0xf32) <- ([0xf32, 0xf32]) + ( + split_576, + split_577, + ) = einsum_433 + del einsum_433 + + # builtin.split: (22x1x1024xf32, 1024x1024xf32) <- ([22x1x1024xf32, 1024x1024xf32]) + ( + split_578, + split_579, + ) = einsum_434 + del einsum_434 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_190, dropout_191 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + einsum_432, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del einsum_432 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_213 = paddle._C_ops.add(dropout_190, layer_norm_135) + del dropout_190, layer_norm_135 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_213, parameter_7, parameter_6, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_213, parameter_6, parameter_7 + + # pd_op.matmul: (22x1x4096xf32) <- (22x1x1024xf32, 1024x4096xf32) + matmul_142 = paddle._C_ops.matmul(layer_norm_138, parameter_3, False, False) + del parameter_3 + + # pd_op.add: (22x1x4096xf32) <- (22x1x4096xf32, 4096xf32) + add_214 = paddle._C_ops.add(matmul_142, parameter_2) + del matmul_142, parameter_2 + + # pd_op.gelu: (22x1x4096xf32) <- (22x1x4096xf32) + gelu_23 = paddle._C_ops.gelu(add_214, False) + del add_214 + + # pd_op.dropout: (22x1x4096xf32, 22x1x4096xui8) <- (22x1x4096xf32, None, 1xf32) + dropout_192, dropout_193 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + gelu_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del gelu_23 + + # pd_op.matmul: (22x1x1024xf32) <- (22x1x4096xf32, 4096x1024xf32) + matmul_143 = paddle._C_ops.matmul(dropout_192, parameter_1, False, False) + del dropout_192, parameter_1 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 1024xf32) + add_215 = paddle._C_ops.add(matmul_143, parameter_0) + del matmul_143, parameter_0 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_194, dropout_195 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_215, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_215 + + # pd_op.add: (22x1x1024xf32) <- (22x1x1024xf32, 22x1x1024xf32) + add_216 = paddle._C_ops.add(dropout_194, layer_norm_138) + del dropout_194, layer_norm_138 + + # pd_op.layer_norm: (22x1x1024xf32, 22x1xf32, 22x1xf32) <- (22x1x1024xf32, 1024xf32, 1024xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_216, parameter_5, parameter_4, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_216, parameter_4, parameter_5 + + # pd_op.dropout: (22x1x1024xf32, 22x1x1024xui8) <- (22x1x1024xf32, None, 1xf32) + dropout_196, dropout_197 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_141, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_3, layer_norm_141 + + # pd_op.transpose: (1x22x1024xf32) <- (22x1x1024xf32) + transpose_0 = paddle._C_ops.transpose(dropout_196, [1, 0, 2]) + del dropout_196 + + return transpose_0 diff --git a/paddle_samples/PaddleNLP/xlnet-large-cased/weight_meta.py b/paddle_samples/PaddleNLP/xlnet-large-cased/weight_meta.py new file mode 100644 index 000000000..8b4095e62 --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-large-cased/weight_meta.py @@ -0,0 +1,4076 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.106054") + max_val = float("0.0960214") + mean = float("-1.15851e-05") + std = float("0.0200029") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.10347") + max_val = float("0.108275") + mean = float("4.60236e-07") + std = float("0.0200048") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0629102") + max_val = float("0.0841791") + mean = float("0.00041478") + std = float("0.0199864") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0638675") + max_val = float("0.0699875") + mean = float("-6.22555e-05") + std = float("0.0203062") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0627365") + max_val = float("0.0704744") + mean = float("0.00031257") + std = float("0.0198551") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0653019") + max_val = float("0.0567174") + mean = float("0.000134436") + std = float("0.0194034") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0946227") + max_val = float("0.109461") + mean = float("-9.10193e-06") + std = float("0.0200007") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.10073") + max_val = float("0.0982617") + mean = float("2.79908e-05") + std = float("0.0199934") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0949046") + max_val = float("0.0942779") + mean = float("-6.38509e-06") + std = float("0.0199833") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.109448") + max_val = float("0.0951698") + mean = float("-1.58499e-05") + std = float("0.0200073") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.106196") + max_val = float("0.103671") + mean = float("-2.10734e-05") + std = float("0.0200118") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0968363") + max_val = float("0.112204") + mean = float("4.902e-06") + std = float("0.0199906") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.117731") + max_val = float("0.102028") + mean = float("1.14172e-05") + std = float("0.0200039") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0727356") + max_val = float("0.0609156") + mean = float("-0.00057746") + std = float("0.0200492") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0661265") + max_val = float("0.0651363") + mean = float("0.000445962") + std = float("0.0203104") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0633563") + max_val = float("0.0703257") + mean = float("0.000476403") + std = float("0.0198287") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0603359") + max_val = float("0.0811232") + mean = float("-0.000254285") + std = float("0.0196772") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0982637") + max_val = float("0.0952011") + mean = float("3.30818e-05") + std = float("0.0200183") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0940764") + max_val = float("0.0988642") + mean = float("2.51877e-05") + std = float("0.019996") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100339") + max_val = float("0.0961819") + mean = float("2.35292e-05") + std = float("0.0200088") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0955299") + max_val = float("0.101229") + mean = float("2.87225e-05") + std = float("0.0199959") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0997276") + max_val = float("0.0980562") + mean = float("1.44465e-05") + std = float("0.0199991") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0997595") + max_val = float("0.102574") + mean = float("-1.85995e-05") + std = float("0.0199962") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.114702") + max_val = float("0.098215") + mean = float("7.8673e-06") + std = float("0.0199983") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0717736") + max_val = float("0.0634031") + mean = float("0.000564224") + std = float("0.020665") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0561762") + max_val = float("0.0550648") + mean = float("-0.00130685") + std = float("0.0204015") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0634076") + max_val = float("0.072255") + mean = float("-0.000885043") + std = float("0.0197112") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0707521") + max_val = float("0.0694411") + mean = float("8.53844e-05") + std = float("0.0199395") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0967653") + max_val = float("0.103006") + mean = float("5.69153e-06") + std = float("0.0200125") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0942901") + max_val = float("0.0982401") + mean = float("-2.33385e-05") + std = float("0.0200236") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0997957") + max_val = float("0.101875") + mean = float("-2.06269e-05") + std = float("0.0199977") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0967376") + max_val = float("0.0937413") + mean = float("2.44117e-07") + std = float("0.0199954") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0995889") + max_val = float("0.0924654") + mean = float("1.09581e-05") + std = float("0.0200201") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.106136") + max_val = float("0.110985") + mean = float("-3.1386e-07") + std = float("0.020002") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.10291") + max_val = float("0.102207") + mean = float("2.15542e-06") + std = float("0.0199973") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0759966") + max_val = float("0.0802708") + mean = float("-5.57094e-06") + std = float("0.0202897") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0748273") + max_val = float("0.0596886") + mean = float("0.000434476") + std = float("0.0202084") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0710389") + max_val = float("0.0645265") + mean = float("-0.000133568") + std = float("0.0199987") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0611757") + max_val = float("0.0547125") + mean = float("-0.000343651") + std = float("0.0193835") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0947259") + max_val = float("0.096012") + mean = float("-7.68183e-06") + std = float("0.0199989") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.105741") + max_val = float("0.0998432") + mean = float("-2.21765e-05") + std = float("0.0199938") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0974652") + max_val = float("0.110738") + mean = float("-2.11843e-05") + std = float("0.0200148") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0902099") + max_val = float("0.0994524") + mean = float("-1.02443e-05") + std = float("0.0200169") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0909302") + max_val = float("0.0948929") + mean = float("4.27818e-07") + std = float("0.0199923") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0983985") + max_val = float("0.104909") + mean = float("-1.20501e-05") + std = float("0.0199983") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.102897") + max_val = float("0.100584") + mean = float("6.06252e-06") + std = float("0.019999") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0824933") + max_val = float("0.059739") + mean = float("0.00019929") + std = float("0.0200107") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0567373") + max_val = float("0.0523855") + mean = float("0.000189737") + std = float("0.0195964") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0576826") + max_val = float("0.0661385") + mean = float("2.54318e-05") + std = float("0.0194682") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0627012") + max_val = float("0.0644959") + mean = float("0.000415049") + std = float("0.0203708") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0987199") + max_val = float("0.0952045") + mean = float("-6.68385e-06") + std = float("0.019987") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101537") + max_val = float("0.109131") + mean = float("1.96553e-05") + std = float("0.0199889") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0934779") + max_val = float("0.0967221") + mean = float("1.3731e-06") + std = float("0.0200065") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0904465") + max_val = float("0.113637") + mean = float("1.9164e-05") + std = float("0.02") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0973789") + max_val = float("0.0949498") + mean = float("2.27422e-06") + std = float("0.0200016") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.100945") + max_val = float("0.100593") + mean = float("-8.58709e-06") + std = float("0.0199981") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.104066") + max_val = float("0.0972485") + mean = float("1.7527e-06") + std = float("0.0199909") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0681122") + max_val = float("0.062478") + mean = float("-0.000147028") + std = float("0.0197177") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0642083") + max_val = float("0.0556058") + mean = float("-0.000402527") + std = float("0.0194766") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0667576") + max_val = float("0.0689737") + mean = float("-0.000169207") + std = float("0.0206662") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0613529") + max_val = float("0.0688503") + mean = float("0.00110047") + std = float("0.0193664") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0947994") + max_val = float("0.105075") + mean = float("3.53332e-06") + std = float("0.0200105") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0946804") + max_val = float("0.0998394") + mean = float("1.64733e-05") + std = float("0.0199982") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101529") + max_val = float("0.0913024") + mean = float("-1.53374e-05") + std = float("0.020014") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101046") + max_val = float("0.0985934") + mean = float("-1.94784e-05") + std = float("0.0199928") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0961183") + max_val = float("0.0941208") + mean = float("2.76461e-05") + std = float("0.0199907") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0962846") + max_val = float("0.107709") + mean = float("-1.81282e-06") + std = float("0.0200036") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.103463") + max_val = float("0.104378") + mean = float("-3.97845e-06") + std = float("0.0200086") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0714291") + max_val = float("0.0682799") + mean = float("0.000820523") + std = float("0.0193262") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0645135") + max_val = float("0.0605717") + mean = float("0.000640813") + std = float("0.0202424") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.070288") + max_val = float("0.0575892") + mean = float("8.90004e-05") + std = float("0.020105") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0616152") + max_val = float("0.0581605") + mean = float("0.000435356") + std = float("0.0196566") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0893861") + max_val = float("0.09287") + mean = float("1.0929e-05") + std = float("0.0199854") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0974691") + max_val = float("0.0946894") + mean = float("-1.60171e-05") + std = float("0.0200062") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0987745") + max_val = float("0.0941207") + mean = float("1.19255e-05") + std = float("0.0199877") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0994819") + max_val = float("0.106583") + mean = float("3.07369e-05") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0924397") + max_val = float("0.0972573") + mean = float("-4.57201e-05") + std = float("0.0200062") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101136") + max_val = float("0.1") + mean = float("-9.05641e-07") + std = float("0.0199949") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100887") + max_val = float("0.103692") + mean = float("-4.19737e-06") + std = float("0.0199957") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.066387") + max_val = float("0.0768451") + mean = float("0.000152864") + std = float("0.0198357") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0633506") + max_val = float("0.0607709") + mean = float("-0.000676489") + std = float("0.0198584") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0631143") + max_val = float("0.0591226") + mean = float("-0.00120845") + std = float("0.0198463") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0591492") + max_val = float("0.057425") + mean = float("0.000858639") + std = float("0.0197628") + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.095505") + max_val = float("0.0974372") + mean = float("-1.35853e-05") + std = float("0.0199879") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0911795") + max_val = float("0.103497") + mean = float("-1.04017e-05") + std = float("0.0200005") + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0916454") + max_val = float("0.101501") + mean = float("6.3882e-06") + std = float("0.0199991") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0879144") + max_val = float("0.0924113") + mean = float("1.04034e-05") + std = float("0.0199618") + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0979216") + max_val = float("0.098031") + mean = float("-6.51493e-06") + std = float("0.0200191") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0977452") + max_val = float("0.100121") + mean = float("-1.08976e-06") + std = float("0.0200082") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.104211") + max_val = float("0.102536") + mean = float("1.01295e-05") + std = float("0.0200073") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0722138") + max_val = float("0.0803455") + mean = float("0.000762837") + std = float("0.0197865") + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.056961") + max_val = float("0.0715826") + mean = float("-0.000786657") + std = float("0.0194175") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0676472") + max_val = float("0.0681574") + mean = float("-0.000424236") + std = float("0.0204415") + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0631112") + max_val = float("0.0610452") + mean = float("-0.000438692") + std = float("0.0205049") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.097165") + max_val = float("0.0994696") + mean = float("1.57879e-06") + std = float("0.0199953") + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.093298") + max_val = float("0.096159") + mean = float("-6.33271e-06") + std = float("0.0200232") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100076") + max_val = float("0.0885343") + mean = float("1.44574e-05") + std = float("0.0199766") + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0925985") + max_val = float("0.09827") + mean = float("7.36023e-06") + std = float("0.0199871") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0949021") + max_val = float("0.106815") + mean = float("-1.50847e-07") + std = float("0.020004") + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.108906") + max_val = float("0.0980955") + mean = float("9.04024e-06") + std = float("0.0200008") + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101134") + max_val = float("0.110153") + mean = float("-8.8502e-06") + std = float("0.0199973") + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0840635") + max_val = float("0.0735396") + mean = float("-0.00035731") + std = float("0.020525") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0694478") + max_val = float("0.0557208") + mean = float("0.000542685") + std = float("0.0191024") + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0774371") + max_val = float("0.0614744") + mean = float("-0.000672589") + std = float("0.0203074") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.068261") + max_val = float("0.0740985") + mean = float("1.77945e-05") + std = float("0.0200567") + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0957754") + max_val = float("0.103741") + mean = float("-9.28956e-06") + std = float("0.0200265") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0909206") + max_val = float("0.0919583") + mean = float("-9.37085e-07") + std = float("0.0200099") + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101895") + max_val = float("0.103712") + mean = float("-3.31451e-05") + std = float("0.0200209") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0914984") + max_val = float("0.0924312") + mean = float("2.24992e-05") + std = float("0.0199888") + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.117054") + max_val = float("0.0974593") + mean = float("7.09017e-06") + std = float("0.0200082") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.104693") + max_val = float("0.109077") + mean = float("1.28089e-06") + std = float("0.0200104") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0992847") + max_val = float("0.0990478") + mean = float("-6.41955e-06") + std = float("0.0199923") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0634189") + max_val = float("0.0753831") + mean = float("0.000107635") + std = float("0.0196634") + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0605175") + max_val = float("0.0513971") + mean = float("-0.000729531") + std = float("0.0196666") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0586949") + max_val = float("0.0657678") + mean = float("-0.000752592") + std = float("0.020432") + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0692708") + max_val = float("0.0596413") + mean = float("-0.000465153") + std = float("0.0199913") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0931099") + max_val = float("0.0954757") + mean = float("-5.31989e-06") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0940111") + max_val = float("0.0973873") + mean = float("-2.33031e-05") + std = float("0.0199905") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0938948") + max_val = float("0.0979914") + mean = float("-6.45665e-06") + std = float("0.020008") + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0941754") + max_val = float("0.0935313") + mean = float("-1.9268e-05") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0908799") + max_val = float("0.0943659") + mean = float("5.38934e-06") + std = float("0.0200054") + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.10681") + max_val = float("0.107106") + mean = float("-5.9112e-06") + std = float("0.0200055") + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100362") + max_val = float("0.102567") + mean = float("3.15704e-06") + std = float("0.0200096") + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.066251") + max_val = float("0.0656874") + mean = float("0.00064487") + std = float("0.0199795") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0616622") + max_val = float("0.0572829") + mean = float("0.00029421") + std = float("0.0202689") + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0663683") + max_val = float("0.0598881") + mean = float("0.000626146") + std = float("0.0199059") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.059461") + max_val = float("0.0599763") + mean = float("0.000392374") + std = float("0.0192713") + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0920732") + max_val = float("0.0965993") + mean = float("1.22806e-05") + std = float("0.0199985") + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0935352") + max_val = float("0.0969375") + mean = float("-1.34561e-05") + std = float("0.0200127") + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0976724") + max_val = float("0.0972049") + mean = float("2.25131e-05") + std = float("0.0200015") + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0971643") + max_val = float("0.116639") + mean = float("4.43747e-06") + std = float("0.0199986") + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0941074") + max_val = float("0.108684") + mean = float("1.62897e-05") + std = float("0.020005") + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101354") + max_val = float("0.112926") + mean = float("-9.28535e-06") + std = float("0.0199946") + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100693") + max_val = float("0.101043") + mean = float("9.13948e-06") + std = float("0.0200057") + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0762107") + max_val = float("0.06625") + mean = float("0.000400155") + std = float("0.0202677") + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0901191") + max_val = float("0.0675753") + mean = float("0.00117218") + std = float("0.02013") + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0612773") + max_val = float("0.0535311") + mean = float("-0.000230495") + std = float("0.020225") + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0577393") + max_val = float("0.062089") + mean = float("2.74119e-05") + std = float("0.0204406") + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0978597") + max_val = float("0.0916914") + mean = float("2.58244e-06") + std = float("0.0199936") + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0934566") + max_val = float("0.0899418") + mean = float("-3.3187e-05") + std = float("0.02") + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100767") + max_val = float("0.0982188") + mean = float("1.2552e-05") + std = float("0.0200214") + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0901681") + max_val = float("0.110304") + mean = float("-7.9722e-06") + std = float("0.0200046") + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0922998") + max_val = float("0.101705") + mean = float("-1.73963e-05") + std = float("0.0199696") + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.1041") + max_val = float("0.107634") + mean = float("9.2561e-06") + std = float("0.0199911") + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100772") + max_val = float("0.102886") + mean = float("-2.07023e-05") + std = float("0.0200025") + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0757654") + max_val = float("0.0701599") + mean = float("-0.000318248") + std = float("0.019568") + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0698035") + max_val = float("0.0679004") + mean = float("0.000979192") + std = float("0.0197564") + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.059353") + max_val = float("0.067176") + mean = float("2.78118e-05") + std = float("0.0196992") + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0697431") + max_val = float("0.0563663") + mean = float("0.000152303") + std = float("0.0199866") + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100614") + max_val = float("0.0912606") + mean = float("2.82769e-06") + std = float("0.0199952") + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0963025") + max_val = float("0.0936212") + mean = float("2.50608e-05") + std = float("0.020014") + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0964591") + max_val = float("0.0953656") + mean = float("4.43783e-06") + std = float("0.0199904") + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0994844") + max_val = float("0.100083") + mean = float("4.20366e-06") + std = float("0.0200082") + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0990775") + max_val = float("0.0973748") + mean = float("3.93006e-05") + std = float("0.0200132") + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0962434") + max_val = float("0.09773") + mean = float("2.75828e-06") + std = float("0.0200034") + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.10718") + max_val = float("0.0996396") + mean = float("1.37343e-05") + std = float("0.0200021") + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0572005") + max_val = float("0.0597119") + mean = float("-0.000229193") + std = float("0.0197843") + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0633685") + max_val = float("0.0622368") + mean = float("0.000740755") + std = float("0.0203714") + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0584085") + max_val = float("0.0657137") + mean = float("-0.000159375") + std = float("0.0202724") + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0803327") + max_val = float("0.0654359") + mean = float("-4.39397e-05") + std = float("0.0192414") + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0943897") + max_val = float("0.0953662") + mean = float("9.25647e-06") + std = float("0.0200079") + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100952") + max_val = float("0.0902493") + mean = float("-1.61662e-05") + std = float("0.019998") + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0941652") + max_val = float("0.0981683") + mean = float("-4.68069e-05") + std = float("0.0200021") + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0961579") + max_val = float("0.0861866") + mean = float("3.35187e-05") + std = float("0.0200144") + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.107999") + max_val = float("0.0947114") + mean = float("-7.68855e-07") + std = float("0.0199972") + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.102002") + max_val = float("0.100289") + mean = float("8.46317e-07") + std = float("0.0199958") + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0942694") + max_val = float("0.106696") + mean = float("-1.24755e-05") + std = float("0.0199964") + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0808637") + max_val = float("0.0661098") + mean = float("-0.000817654") + std = float("0.0201087") + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0648078") + max_val = float("0.0631008") + mean = float("-0.00095584") + std = float("0.0200808") + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0644025") + max_val = float("0.0644977") + mean = float("0.000808763") + std = float("0.0205874") + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0623088") + max_val = float("0.064596") + mean = float("-0.000765763") + std = float("0.0200499") + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.10159") + max_val = float("0.103478") + mean = float("4.39358e-05") + std = float("0.0199973") + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0902865") + max_val = float("0.0975484") + mean = float("-1.29312e-05") + std = float("0.0199991") + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0967346") + max_val = float("0.0973734") + mean = float("-2.19967e-05") + std = float("0.0200175") + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0959548") + max_val = float("0.108116") + mean = float("-3.80651e-05") + std = float("0.0199919") + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0958974") + max_val = float("0.0982373") + mean = float("2.84582e-06") + std = float("0.020007") + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.100233") + max_val = float("0.10353") + mean = float("1.35232e-05") + std = float("0.0200085") + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0984514") + max_val = float("0.103702") + mean = float("2.04813e-06") + std = float("0.0200084") + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0625527") + max_val = float("0.067685") + mean = float("-7.07414e-05") + std = float("0.019375") + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0639338") + max_val = float("0.0624786") + mean = float("0.00068492") + std = float("0.0202815") + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0621922") + max_val = float("0.0678433") + mean = float("-0.000639305") + std = float("0.0202925") + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0639842") + max_val = float("0.0744252") + mean = float("0.000404183") + std = float("0.0197421") + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0925419") + max_val = float("0.0927665") + mean = float("3.65188e-06") + std = float("0.0200096") + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101495") + max_val = float("0.0973781") + mean = float("1.73695e-05") + std = float("0.0199796") + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0978203") + max_val = float("0.0972203") + mean = float("-1.14079e-05") + std = float("0.0200017") + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0956221") + max_val = float("0.104711") + mean = float("1.37487e-05") + std = float("0.0200021") + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0915618") + max_val = float("0.108204") + mean = float("2.93813e-05") + std = float("0.0200081") + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.114183") + max_val = float("0.104422") + mean = float("7.3287e-06") + std = float("0.0199976") + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101349") + max_val = float("0.10013") + mean = float("7.6001e-07") + std = float("0.0199979") + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0645872") + max_val = float("0.0716855") + mean = float("-0.000495045") + std = float("0.0195812") + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0611582") + max_val = float("0.0727321") + mean = float("-0.000392967") + std = float("0.0201977") + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0581971") + max_val = float("0.0598608") + mean = float("0.000397432") + std = float("0.0191885") + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0564572") + max_val = float("0.073037") + mean = float("0.00153765") + std = float("0.0206363") + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0962484") + max_val = float("0.0942868") + mean = float("8.79621e-06") + std = float("0.0200071") + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0956998") + max_val = float("0.0942656") + mean = float("-3.91776e-06") + std = float("0.019995") + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0909612") + max_val = float("0.105077") + mean = float("4.49877e-05") + std = float("0.0199845") + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.105015") + max_val = float("0.0947171") + mean = float("-1.10435e-05") + std = float("0.0200096") + data = None + + +class Program_weight_tensor_parameter_305: + name = "parameter_305" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100484") + max_val = float("0.098835") + mean = float("-1.65579e-05") + std = float("0.0200025") + data = None + + +class Program_weight_tensor_parameter_306: + name = "parameter_306" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_307: + name = "parameter_307" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.110146") + max_val = float("0.109618") + mean = float("7.97491e-06") + std = float("0.0199906") + data = None + + +class Program_weight_tensor_parameter_308: + name = "parameter_308" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_309: + name = "parameter_309" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101873") + max_val = float("0.105303") + mean = float("5.83982e-06") + std = float("0.0199958") + data = None + + +class Program_weight_tensor_parameter_310: + name = "parameter_310" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_311: + name = "parameter_311" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_312: + name = "parameter_312" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_313: + name = "parameter_313" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_314: + name = "parameter_314" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0833993") + max_val = float("0.0664246") + mean = float("-0.000166527") + std = float("0.0204661") + data = None + + +class Program_weight_tensor_parameter_315: + name = "parameter_315" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0728898") + max_val = float("0.0616484") + mean = float("-0.000625521") + std = float("0.0198375") + data = None + + +class Program_weight_tensor_parameter_316: + name = "parameter_316" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0652227") + max_val = float("0.0543112") + mean = float("0.00134297") + std = float("0.0200099") + data = None + + +class Program_weight_tensor_parameter_317: + name = "parameter_317" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0576651") + max_val = float("0.0620159") + mean = float("-0.000763434") + std = float("0.0200541") + data = None + + +class Program_weight_tensor_parameter_318: + name = "parameter_318" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0959107") + max_val = float("0.097618") + mean = float("1.68048e-05") + std = float("0.0199765") + data = None + + +class Program_weight_tensor_parameter_319: + name = "parameter_319" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.102617") + max_val = float("0.102757") + mean = float("-2.87196e-06") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_320: + name = "parameter_320" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0937724") + max_val = float("0.100478") + mean = float("-4.96675e-08") + std = float("0.0200332") + data = None + + +class Program_weight_tensor_parameter_321: + name = "parameter_321" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0923792") + max_val = float("0.0992109") + mean = float("-1.49416e-05") + std = float("0.0200146") + data = None + + +class Program_weight_tensor_parameter_322: + name = "parameter_322" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0942468") + max_val = float("0.0981391") + mean = float("-1.31399e-05") + std = float("0.0200129") + data = None + + +class Program_weight_tensor_parameter_323: + name = "parameter_323" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_324: + name = "parameter_324" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0986392") + max_val = float("0.0974826") + mean = float("1.40878e-05") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_325: + name = "parameter_325" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_326: + name = "parameter_326" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0987421") + max_val = float("0.100351") + mean = float("-3.03125e-06") + std = float("0.0199945") + data = None + + +class Program_weight_tensor_parameter_327: + name = "parameter_327" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_328: + name = "parameter_328" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_329: + name = "parameter_329" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_330: + name = "parameter_330" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_331: + name = "parameter_331" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0775364") + max_val = float("0.0678114") + mean = float("-0.000527159") + std = float("0.0202819") + data = None + + +class Program_weight_tensor_parameter_332: + name = "parameter_332" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0583356") + max_val = float("0.0583862") + mean = float("-5.84847e-05") + std = float("0.0197946") + data = None + + +class Program_weight_tensor_parameter_333: + name = "parameter_333" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0783503") + max_val = float("0.0747464") + mean = float("-9.03913e-06") + std = float("0.020419") + data = None + + +class Program_weight_tensor_parameter_334: + name = "parameter_334" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0588559") + max_val = float("0.0589071") + mean = float("-0.000510391") + std = float("0.0195717") + data = None + + +class Program_weight_tensor_parameter_335: + name = "parameter_335" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0965073") + max_val = float("0.101522") + mean = float("5.3999e-06") + std = float("0.0200109") + data = None + + +class Program_weight_tensor_parameter_336: + name = "parameter_336" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0938622") + max_val = float("0.10364") + mean = float("-2.70096e-05") + std = float("0.0200286") + data = None + + +class Program_weight_tensor_parameter_337: + name = "parameter_337" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0904915") + max_val = float("0.0892064") + mean = float("5.79199e-06") + std = float("0.0200299") + data = None + + +class Program_weight_tensor_parameter_338: + name = "parameter_338" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0905588") + max_val = float("0.103688") + mean = float("-4.00367e-06") + std = float("0.0200005") + data = None + + +class Program_weight_tensor_parameter_339: + name = "parameter_339" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0945638") + max_val = float("0.0893655") + mean = float("-8.73476e-06") + std = float("0.0199973") + data = None + + +class Program_weight_tensor_parameter_340: + name = "parameter_340" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_341: + name = "parameter_341" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.10733") + max_val = float("0.106369") + mean = float("2.19368e-07") + std = float("0.0199976") + data = None + + +class Program_weight_tensor_parameter_342: + name = "parameter_342" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_343: + name = "parameter_343" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0957499") + max_val = float("0.115042") + mean = float("-9.20385e-08") + std = float("0.0200026") + data = None + + +class Program_weight_tensor_parameter_344: + name = "parameter_344" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_345: + name = "parameter_345" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_346: + name = "parameter_346" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_347: + name = "parameter_347" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_348: + name = "parameter_348" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0604663") + max_val = float("0.0795493") + mean = float("4.21371e-05") + std = float("0.0199298") + data = None + + +class Program_weight_tensor_parameter_349: + name = "parameter_349" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0630816") + max_val = float("0.0646421") + mean = float("0.0010257") + std = float("0.0197039") + data = None + + +class Program_weight_tensor_parameter_350: + name = "parameter_350" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0692722") + max_val = float("0.0626657") + mean = float("-0.000970121") + std = float("0.0204877") + data = None + + +class Program_weight_tensor_parameter_351: + name = "parameter_351" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0622362") + max_val = float("0.0625566") + mean = float("0.000481886") + std = float("0.0193751") + data = None + + +class Program_weight_tensor_parameter_352: + name = "parameter_352" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0945452") + max_val = float("0.0955453") + mean = float("-5.62408e-06") + std = float("0.019999") + data = None + + +class Program_weight_tensor_parameter_353: + name = "parameter_353" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.096623") + max_val = float("0.096632") + mean = float("-3.12025e-05") + std = float("0.0200064") + data = None + + +class Program_weight_tensor_parameter_354: + name = "parameter_354" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.104093") + max_val = float("0.10271") + mean = float("2.11826e-05") + std = float("0.0200002") + data = None + + +class Program_weight_tensor_parameter_355: + name = "parameter_355" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101074") + max_val = float("0.0974191") + mean = float("1.87963e-05") + std = float("0.0200046") + data = None + + +class Program_weight_tensor_parameter_356: + name = "parameter_356" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0949054") + max_val = float("0.0949776") + mean = float("3.0356e-05") + std = float("0.0200197") + data = None + + +class Program_weight_tensor_parameter_357: + name = "parameter_357" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_358: + name = "parameter_358" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0998529") + max_val = float("0.100143") + mean = float("-1.32428e-05") + std = float("0.0200092") + data = None + + +class Program_weight_tensor_parameter_359: + name = "parameter_359" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_360: + name = "parameter_360" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0979326") + max_val = float("0.0999568") + mean = float("-7.35727e-06") + std = float("0.0199956") + data = None + + +class Program_weight_tensor_parameter_361: + name = "parameter_361" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_362: + name = "parameter_362" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_363: + name = "parameter_363" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_364: + name = "parameter_364" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_365: + name = "parameter_365" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0653577") + max_val = float("0.0607269") + mean = float("6.57215e-05") + std = float("0.0197316") + data = None + + +class Program_weight_tensor_parameter_366: + name = "parameter_366" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0630385") + max_val = float("0.0739176") + mean = float("3.99468e-05") + std = float("0.0201011") + data = None + + +class Program_weight_tensor_parameter_367: + name = "parameter_367" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0548557") + max_val = float("0.0849987") + mean = float("-0.000277907") + std = float("0.0197348") + data = None + + +class Program_weight_tensor_parameter_368: + name = "parameter_368" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0676048") + max_val = float("0.0741839") + mean = float("-0.000614193") + std = float("0.0193521") + data = None + + +class Program_weight_tensor_parameter_369: + name = "parameter_369" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0882833") + max_val = float("0.100105") + mean = float("1.57479e-05") + std = float("0.0200117") + data = None + + +class Program_weight_tensor_parameter_370: + name = "parameter_370" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0898517") + max_val = float("0.101617") + mean = float("-2.55895e-05") + std = float("0.019994") + data = None + + +class Program_weight_tensor_parameter_371: + name = "parameter_371" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0981875") + max_val = float("0.097163") + mean = float("-1.07195e-05") + std = float("0.0199935") + data = None + + +class Program_weight_tensor_parameter_372: + name = "parameter_372" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101081") + max_val = float("0.0956739") + mean = float("2.08879e-05") + std = float("0.0200277") + data = None + + +class Program_weight_tensor_parameter_373: + name = "parameter_373" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0991906") + max_val = float("0.0970829") + mean = float("6.88477e-06") + std = float("0.0200219") + data = None + + +class Program_weight_tensor_parameter_374: + name = "parameter_374" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_375: + name = "parameter_375" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.115641") + max_val = float("0.098508") + mean = float("-6.77359e-06") + std = float("0.0199976") + data = None + + +class Program_weight_tensor_parameter_376: + name = "parameter_376" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_377: + name = "parameter_377" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0984785") + max_val = float("0.0997063") + mean = float("-8.42396e-07") + std = float("0.0200093") + data = None + + +class Program_weight_tensor_parameter_378: + name = "parameter_378" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_379: + name = "parameter_379" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_380: + name = "parameter_380" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_381: + name = "parameter_381" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_382: + name = "parameter_382" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0638253") + max_val = float("0.0755663") + mean = float("0.00015895") + std = float("0.0202998") + data = None + + +class Program_weight_tensor_parameter_383: + name = "parameter_383" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0589826") + max_val = float("0.0668031") + mean = float("0.000758693") + std = float("0.0198609") + data = None + + +class Program_weight_tensor_parameter_384: + name = "parameter_384" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.066861") + max_val = float("0.0602476") + mean = float("0.000222833") + std = float("0.0206746") + data = None + + +class Program_weight_tensor_parameter_385: + name = "parameter_385" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0601304") + max_val = float("0.0608498") + mean = float("-0.000389166") + std = float("0.0197741") + data = None + + +class Program_weight_tensor_parameter_386: + name = "parameter_386" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.095009") + max_val = float("0.107419") + mean = float("1.37709e-05") + std = float("0.020002") + data = None + + +class Program_weight_tensor_parameter_387: + name = "parameter_387" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103219") + max_val = float("0.0868753") + mean = float("1.44e-06") + std = float("0.0200017") + data = None + + +class Program_weight_tensor_parameter_388: + name = "parameter_388" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0906494") + max_val = float("0.095732") + mean = float("6.41013e-07") + std = float("0.0199844") + data = None + + +class Program_weight_tensor_parameter_389: + name = "parameter_389" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0943175") + max_val = float("0.0916165") + mean = float("-1.77288e-05") + std = float("0.0200087") + data = None + + +class Program_weight_tensor_parameter_390: + name = "parameter_390" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0939238") + max_val = float("0.0962935") + mean = float("-3.63531e-06") + std = float("0.0200132") + data = None + + +class Program_weight_tensor_parameter_391: + name = "parameter_391" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_392: + name = "parameter_392" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0974197") + max_val = float("0.104597") + mean = float("1.10199e-06") + std = float("0.0199984") + data = None + + +class Program_weight_tensor_parameter_393: + name = "parameter_393" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_394: + name = "parameter_394" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.123339") + max_val = float("0.106628") + mean = float("1.25406e-05") + std = float("0.0199984") + data = None + + +class Program_weight_tensor_parameter_395: + name = "parameter_395" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_396: + name = "parameter_396" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_397: + name = "parameter_397" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_398: + name = "parameter_398" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_399: + name = "parameter_399" + shape = [2, 16, 64] + dtype = "float32" + min_val = float("-0.0816646") + max_val = float("0.0571751") + mean = float("-0.00102561") + std = float("0.0201216") + data = None + + +class Program_weight_tensor_parameter_400: + name = "parameter_400" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0656832") + max_val = float("0.062296") + mean = float("-0.000532733") + std = float("0.0202617") + data = None + + +class Program_weight_tensor_parameter_401: + name = "parameter_401" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0586502") + max_val = float("0.0668614") + mean = float("0.000874736") + std = float("0.0200638") + data = None + + +class Program_weight_tensor_parameter_402: + name = "parameter_402" + shape = [16, 64] + dtype = "float32" + min_val = float("-0.0553704") + max_val = float("0.0684032") + mean = float("-0.000129585") + std = float("0.0194602") + data = None + + +class Program_weight_tensor_parameter_403: + name = "parameter_403" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0992038") + max_val = float("0.0909653") + mean = float("-1.80797e-05") + std = float("0.0199992") + data = None + + +class Program_weight_tensor_parameter_404: + name = "parameter_404" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.114171") + max_val = float("0.0952026") + mean = float("8.99988e-06") + std = float("0.0200014") + data = None + + +class Program_weight_tensor_parameter_405: + name = "parameter_405" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.09525") + max_val = float("0.100321") + mean = float("-2.06329e-05") + std = float("0.0199986") + data = None + + +class Program_weight_tensor_parameter_406: + name = "parameter_406" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0894399") + max_val = float("0.0966649") + mean = float("1.86653e-05") + std = float("0.019994") + data = None + + +class Program_weight_tensor_parameter_407: + name = "parameter_407" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.105839") + max_val = float("0.0950324") + mean = float("-1.29832e-05") + std = float("0.0200056") + data = None + + +class Program_weight_tensor_parameter_408: + name = "parameter_408" + shape = [32000, 1024] + dtype = "float32" + min_val = float("-0.109282") + max_val = float("0.10991") + mean = float("3.50928e-06") + std = float("0.0200011") + data = None + + +class Program_weight_tensor_parameter_409: + name = "parameter_409" + shape = [1, 1, 1024] + dtype = "float32" + min_val = float("-0.0618999") + max_val = float("0.0796384") + mean = float("-0.000848671") + std = float("0.0199154") + data = None From adadf101a0ba10240ea11a774a63fb2c25d33320 Mon Sep 17 00:00:00 2001 From: RbRe145 Date: Thu, 25 Sep 2025 07:55:47 +0000 Subject: [PATCH 3/4] add new albert and t5 models --- graph_net/test/nlp_model_getter.py | 90 + .../PaddleNLP/albert-base-v1/graph_net.json | 6 + .../PaddleNLP/albert-base-v1/input_meta.py | 41 + .../PaddleNLP/albert-base-v1/model.py | 1900 ++++++++++ .../PaddleNLP/albert-base-v1/weight_meta.py | 235 ++ .../PaddleNLP/albert-base-v2/graph_net.json | 6 + .../PaddleNLP/albert-base-v2/input_meta.py | 41 + .../PaddleNLP/albert-base-v2/model.py | 2003 ++++++++++ .../PaddleNLP/albert-base-v2/weight_meta.py | 235 ++ .../albert-chinese-base/graph_net.json | 6 + .../albert-chinese-base/input_meta.py | 19 + .../PaddleNLP/albert-chinese-base/model.py | 1670 +++++++++ .../albert-chinese-base/weight_meta.py | 235 ++ .../albert-chinese-small/graph_net.json | 6 + .../albert-chinese-small/input_meta.py | 19 + .../PaddleNLP/albert-chinese-small/model.py | 914 +++++ .../albert-chinese-small/weight_meta.py | 237 ++ .../albert-chinese-tiny/graph_net.json | 6 + .../albert-chinese-tiny/input_meta.py | 19 + .../PaddleNLP/albert-chinese-tiny/model.py | 662 ++++ .../albert-chinese-tiny/weight_meta.py | 235 ++ .../PaddleNLP/t5-small/graph_net.json | 6 + .../PaddleNLP/t5-small/input_meta.py | 40 + paddle_samples/PaddleNLP/t5-small/model.py | 3317 +++++++++++++++++ .../PaddleNLP/t5-small/weight_meta.py | 1439 +++++++ 25 files changed, 13387 insertions(+) create mode 100644 paddle_samples/PaddleNLP/albert-base-v1/graph_net.json create mode 100644 paddle_samples/PaddleNLP/albert-base-v1/input_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-base-v1/model.py create mode 100644 paddle_samples/PaddleNLP/albert-base-v1/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-base-v2/graph_net.json create mode 100644 paddle_samples/PaddleNLP/albert-base-v2/input_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-base-v2/model.py create mode 100644 paddle_samples/PaddleNLP/albert-base-v2/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-base/graph_net.json create mode 100644 paddle_samples/PaddleNLP/albert-chinese-base/input_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-base/model.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-base/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-small/graph_net.json create mode 100644 paddle_samples/PaddleNLP/albert-chinese-small/input_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-small/model.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-small/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-tiny/graph_net.json create mode 100644 paddle_samples/PaddleNLP/albert-chinese-tiny/input_meta.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-tiny/model.py create mode 100644 paddle_samples/PaddleNLP/albert-chinese-tiny/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/t5-small/graph_net.json create mode 100644 paddle_samples/PaddleNLP/t5-small/input_meta.py create mode 100644 paddle_samples/PaddleNLP/t5-small/model.py create mode 100644 paddle_samples/PaddleNLP/t5-small/weight_meta.py diff --git a/graph_net/test/nlp_model_getter.py b/graph_net/test/nlp_model_getter.py index 5ce710b24..d795f7e30 100644 --- a/graph_net/test/nlp_model_getter.py +++ b/graph_net/test/nlp_model_getter.py @@ -154,3 +154,93 @@ def get_xlnet_model_and_inputs(model_name, text, dtype): enc["attention_mask"] = (input_ids != pad_id).astype("int64") return model, enc + + +def get_t5_model_and_inputs(model_name, text, dtype): + import paddle + from paddlenlp.transformers import T5ForConditionalGeneration, T5Tokenizer + + # 1) 分词器(先建 tokenizer 方便取 pad/eos id) + tokenizer = T5Tokenizer.from_pretrained(model_name) + + # 2) 编码输入(支持单条或批量 text) + enc = tokenizer( + text, + return_tensors="pd", + padding=True, + truncation=True, + max_length=512, + ) + + # 补 attention_mask(pad 处为 0,其他为 1) + if "attention_mask" not in enc: + input_ids = enc["input_ids"] + attn_mask = (input_ids != tokenizer.pad_token_id).astype("int64") + enc["attention_mask"] = attn_mask + + # 构造 decoder_input_ids: + # T5 以 pad_token_id 作为 decoder_start_token_id + batch_size = enc["input_ids"].shape[0] + decoder_input_ids = paddle.full( + shape=[batch_size, 1], + fill_value=tokenizer.pad_token_id, + dtype="int64", + ) + + # 3) 加载模型 + model = T5ForConditionalGeneration.from_pretrained(model_name) + if dtype == "float16": + model = model.astype(paddle.float16) + model.eval() + + # 4) 组装喂给模型的输入 + inputs = { + "input_ids": enc["input_ids"], + "attention_mask": enc["attention_mask"], + "decoder_input_ids": decoder_input_ids, + } + return model, inputs + + +def get_albert_model_and_inputs(model_name, text, dtype): + """ + 加载 ALBERT backbone(AlbertModel)并构造输入。 + - model_name 例如: "albert-base-v2", "albert-xxlarge-v1"(PaddleNLP 内置名称) + - dtype: "float32" 或 "float16" + 返回: (model, inputs_dict) + """ + import paddle + from paddlenlp.transformers import AlbertConfig, AlbertModel, AlbertTokenizer + + # 1) 读取配置(不触发权重下载) + config = AlbertConfig.from_pretrained(model_name) + + # 2) 模型 + # 若你只需要网络结构,可改成: model = AlbertModel(config) + model = AlbertModel(config) + if dtype == "float16": + model = model.astype(paddle.float16) + model.eval() + + # 3) 分词器 + tokenizer = AlbertTokenizer.from_pretrained(model_name) + + # 若无 pad_token,则回退到 unk_token(ALBERT 没有 eos_token,别设 pad=eos) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.unk_token + + # 4) 构造输入(支持 str 或 List[str]) + enc = tokenizer( + text, + return_tensors="pd", + padding=True, + truncation=True, + max_length=512, + ) + + # 显式补 attention_mask(pad 处为 0) + if "attention_mask" not in enc: + input_ids = enc["input_ids"] + enc["attention_mask"] = (input_ids != tokenizer.pad_token_id).astype("int64") + + return model, enc diff --git a/paddle_samples/PaddleNLP/albert-base-v1/graph_net.json b/paddle_samples/PaddleNLP/albert-base-v1/graph_net.json new file mode 100644 index 000000000..e0b36802b --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v1/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-base-v1", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-base-v1/input_meta.py b/paddle_samples/PaddleNLP/albert-base-v1/input_meta.py new file mode 100644 index 000000000..b45834638 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v1/input_meta.py @@ -0,0 +1,41 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 21] + dtype = "int64" + data = [ + 2, + 10975, + 15, + 51, + 204, + 25, + 1909, + 9, + 31, + 589, + 2477, + 88, + 370, + 816, + 2761, + 17, + 66, + 2607, + 18, + 9, + 3, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 21] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 21] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-base-v1/model.py b/paddle_samples/PaddleNLP/albert-base-v1/model.py new file mode 100644 index 000000000..d13518e69 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v1/model.py @@ -0,0 +1,1900 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x21xi64) <- (1x21xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x21xi64) <- (1x1x21xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x21xf32) <- (1x1x1x21xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [21] + + # pd_op.slice: (1x21xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 30000x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x21x128xf32, 1x21xf32, 1x21xf32) <- (1x21x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x21x128xf32, 1x21x128xui8) <- (1x21x128xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_0 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x128xf32, 128x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_20, False, False) + del dropout_0, parameter_20 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 21, 12, 64] + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_3, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_5 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_7, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_8 = paddle._C_ops.add(add_2, dropout_4) + del add_2, dropout_4 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_0 = paddle._C_ops.gelu(add_9, False) + del add_9 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_8 = paddle._C_ops.matmul(gelu_0, parameter_4, False, False) + del gelu_0 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_11 = paddle._C_ops.add(add_10, layer_norm_3) + del add_10, layer_norm_3 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_4) + del add_12 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_3 = paddle._C_ops.scale(matmul_12, full_3, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_15 = paddle._C_ops.add(scale_3, scale_1) + del scale_3 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_13 = paddle._C_ops.matmul(dropout_6, transpose_6, False, False) + del dropout_6, transpose_6 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_16, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_17 = paddle._C_ops.add(layer_norm_6, dropout_8) + del dropout_8, layer_norm_6 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_1 = paddle._C_ops.gelu(add_18, False) + del add_18 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_16 = paddle._C_ops.matmul(gelu_1, parameter_4, False, False) + del gelu_1 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_20 = paddle._C_ops.add(add_19, layer_norm_9) + del add_19, layer_norm_9 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_4) + del add_21 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_4) + del add_22 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_4 = paddle._C_ops.scale(matmul_20, full_3, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_24 = paddle._C_ops.add(scale_4, scale_1) + del scale_4 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_21 = paddle._C_ops.matmul(dropout_10, transpose_10, False, False) + del dropout_10, transpose_10 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_25, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_25 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_26 = paddle._C_ops.add(layer_norm_12, dropout_12) + del dropout_12, layer_norm_12 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_2 = paddle._C_ops.gelu(add_27, False) + del add_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_24 = paddle._C_ops.matmul(gelu_2, parameter_4, False, False) + del gelu_2 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_29 = paddle._C_ops.add(add_28, layer_norm_15) + del add_28, layer_norm_15 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_4) + del add_30 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_4) + del add_31 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_4) + del add_32 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_28, full_3, float("0"), True) + del matmul_28 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_33 = paddle._C_ops.add(scale_5, scale_1) + del scale_5 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_29 = paddle._C_ops.matmul(dropout_14, transpose_14, False, False) + del dropout_14, transpose_14 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del transpose_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del reshape_15 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_34, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_34 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_35 = paddle._C_ops.add(layer_norm_18, dropout_16) + del dropout_16, layer_norm_18 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_3 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_32 = paddle._C_ops.matmul(gelu_3, parameter_4, False, False) + del gelu_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_38 = paddle._C_ops.add(add_37, layer_norm_21) + del add_37, layer_norm_21 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_15) + del matmul_33 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_13) + del matmul_34 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_35, parameter_11) + del matmul_35 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_39, full_int_array_4) + del add_39 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_40, full_int_array_4) + del add_40 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_41, full_int_array_4) + del add_41 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_36 = paddle._C_ops.matmul(transpose_16, transpose_17, False, True) + del transpose_16, transpose_17 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_6 = paddle._C_ops.scale(matmul_36, full_3, float("0"), True) + del matmul_36 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_42 = paddle._C_ops.add(scale_6, scale_1) + del scale_6 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_4 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_37 = paddle._C_ops.matmul(dropout_18, transpose_18, False, False) + del dropout_18, transpose_18 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_5) + del transpose_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_10, False, False) + del reshape_19 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_43 = paddle._C_ops.add(matmul_38, parameter_9) + del matmul_38 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_43, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_44 = paddle._C_ops.add(layer_norm_24, dropout_20) + del dropout_20, layer_norm_24 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_44, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_44 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_5) + del matmul_39 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_45, False) + del add_45 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_40 = paddle._C_ops.matmul(gelu_4, parameter_4, False, False) + del gelu_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_46 = paddle._C_ops.add(matmul_40, parameter_3) + del matmul_40 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_47 = paddle._C_ops.add(add_46, layer_norm_27) + del add_46, layer_norm_27 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_47, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_47 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_15) + del matmul_41 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_13) + del matmul_42 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_50 = paddle._C_ops.add(matmul_43, parameter_11) + del matmul_43 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_48, full_int_array_4) + del add_48 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_49, full_int_array_4) + del add_49 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_50, full_int_array_4) + del add_50 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_44 = paddle._C_ops.matmul(transpose_20, transpose_21, False, True) + del transpose_20, transpose_21 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_7 = paddle._C_ops.scale(matmul_44, full_3, float("0"), True) + del matmul_44 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_51 = paddle._C_ops.add(scale_7, scale_1) + del scale_7 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_5 = paddle._C_ops.softmax(add_51, -1) + del add_51 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_45 = paddle._C_ops.matmul(dropout_22, transpose_22, False, False) + del dropout_22, transpose_22 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_5) + del transpose_23 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_10, False, False) + del reshape_23 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_46, parameter_9) + del matmul_46 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_52, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_52 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_53 = paddle._C_ops.add(layer_norm_30, dropout_24) + del dropout_24, layer_norm_30 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_53 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_5) + del matmul_47 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_54, False) + del add_54 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_48 = paddle._C_ops.matmul(gelu_5, parameter_4, False, False) + del gelu_5 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_48, parameter_3) + del matmul_48 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_56 = paddle._C_ops.add(add_55, layer_norm_33) + del add_55, layer_norm_33 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_56 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_36, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_15) + del matmul_49 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_36, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_13) + del matmul_50 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(layer_norm_36, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_59 = paddle._C_ops.add(matmul_51, parameter_11) + del matmul_51 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_57, full_int_array_4) + del add_57 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_58, full_int_array_4) + del add_58 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_59, full_int_array_4) + del add_59 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_52 = paddle._C_ops.matmul(transpose_24, transpose_25, False, True) + del transpose_24, transpose_25 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_8 = paddle._C_ops.scale(matmul_52, full_3, float("0"), True) + del matmul_52 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_60 = paddle._C_ops.add(scale_8, scale_1) + del scale_8 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_6 = paddle._C_ops.softmax(add_60, -1) + del add_60 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_53 = paddle._C_ops.matmul(dropout_26, transpose_26, False, False) + del dropout_26, transpose_26 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_5) + del transpose_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_10, False, False) + del reshape_27 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_54, parameter_9) + del matmul_54 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_61, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_61 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_62 = paddle._C_ops.add(layer_norm_36, dropout_28) + del dropout_28, layer_norm_36 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_62 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_39, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_5) + del matmul_55 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_63, False) + del add_63 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(gelu_6, parameter_4, False, False) + del gelu_6 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_56, parameter_3) + del matmul_56 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_65 = paddle._C_ops.add(add_64, layer_norm_39) + del add_64, layer_norm_39 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_65 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_42, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_15) + del matmul_57 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_42, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_13) + del matmul_58 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(layer_norm_42, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_68 = paddle._C_ops.add(matmul_59, parameter_11) + del matmul_59 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_66, full_int_array_4) + del add_66 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_67, full_int_array_4) + del add_67 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_68, full_int_array_4) + del add_68 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_60 = paddle._C_ops.matmul(transpose_28, transpose_29, False, True) + del transpose_28, transpose_29 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_9 = paddle._C_ops.scale(matmul_60, full_3, float("0"), True) + del matmul_60 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_69 = paddle._C_ops.add(scale_9, scale_1) + del scale_9 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_7 = paddle._C_ops.softmax(add_69, -1) + del add_69 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_61 = paddle._C_ops.matmul(dropout_30, transpose_30, False, False) + del dropout_30, transpose_30 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_61, [0, 2, 1, 3]) + del matmul_61 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_5) + del transpose_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(reshape_31, parameter_10, False, False) + del reshape_31 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_70 = paddle._C_ops.add(matmul_62, parameter_9) + del matmul_62 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_70, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_70 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_71 = paddle._C_ops.add(layer_norm_42, dropout_32) + del dropout_32, layer_norm_42 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_71, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_71 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_45, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_5) + del matmul_63 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_72, False) + del add_72 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_64 = paddle._C_ops.matmul(gelu_7, parameter_4, False, False) + del gelu_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_73 = paddle._C_ops.add(matmul_64, parameter_3) + del matmul_64 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_74 = paddle._C_ops.add(add_73, layer_norm_45) + del add_73, layer_norm_45 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_74, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_74 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_48, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_15) + del matmul_65 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_48, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_13) + del matmul_66 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_48, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_77 = paddle._C_ops.add(matmul_67, parameter_11) + del matmul_67 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_75, full_int_array_4) + del add_75 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_76, full_int_array_4) + del add_76 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_77, full_int_array_4) + del add_77 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_68 = paddle._C_ops.matmul(transpose_32, transpose_33, False, True) + del transpose_32, transpose_33 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_10 = paddle._C_ops.scale(matmul_68, full_3, float("0"), True) + del matmul_68 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_78 = paddle._C_ops.add(scale_10, scale_1) + del scale_10 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_8 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_69 = paddle._C_ops.matmul(dropout_34, transpose_34, False, False) + del dropout_34, transpose_34 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_5) + del transpose_35 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_70 = paddle._C_ops.matmul(reshape_35, parameter_10, False, False) + del reshape_35 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_79 = paddle._C_ops.add(matmul_70, parameter_9) + del matmul_70 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_79, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_79 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_80 = paddle._C_ops.add(layer_norm_48, dropout_36) + del dropout_36, layer_norm_48 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_80 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_71 = paddle._C_ops.matmul(layer_norm_51, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_5) + del matmul_71 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_72 = paddle._C_ops.matmul(gelu_8, parameter_4, False, False) + del gelu_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_82 = paddle._C_ops.add(matmul_72, parameter_3) + del matmul_72 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_83 = paddle._C_ops.add(add_82, layer_norm_51) + del add_82, layer_norm_51 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_83 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_54, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_15) + del matmul_73 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_54, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_13) + del matmul_74 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_54, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_75, parameter_11) + del matmul_75 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_84, full_int_array_4) + del add_84 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_85, full_int_array_4) + del add_85 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_86, full_int_array_4) + del add_86 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_76 = paddle._C_ops.matmul(transpose_36, transpose_37, False, True) + del transpose_36, transpose_37 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_11 = paddle._C_ops.scale(matmul_76, full_3, float("0"), True) + del matmul_76 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_87 = paddle._C_ops.add(scale_11, scale_1) + del scale_11 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_9 = paddle._C_ops.softmax(add_87, -1) + del add_87 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_77 = paddle._C_ops.matmul(dropout_38, transpose_38, False, False) + del dropout_38, transpose_38 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_77, [0, 2, 1, 3]) + del matmul_77 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_5) + del transpose_39 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(reshape_39, parameter_10, False, False) + del reshape_39 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_88 = paddle._C_ops.add(matmul_78, parameter_9) + del matmul_78 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_88, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_88 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_89 = paddle._C_ops.add(layer_norm_54, dropout_40) + del dropout_40, layer_norm_54 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_89, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_89 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_57, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_5) + del matmul_79 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_90, False) + del add_90 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_80 = paddle._C_ops.matmul(gelu_9, parameter_4, False, False) + del gelu_9 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_91 = paddle._C_ops.add(matmul_80, parameter_3) + del matmul_80 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_92 = paddle._C_ops.add(add_91, layer_norm_57) + del add_91, layer_norm_57 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_92, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_92 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_60, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_15) + del matmul_81 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_60, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_13) + del matmul_82 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_60, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_95 = paddle._C_ops.add(matmul_83, parameter_11) + del matmul_83 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_93, full_int_array_4) + del add_93 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_94, full_int_array_4) + del add_94 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_95, full_int_array_4) + del add_95 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_84 = paddle._C_ops.matmul(transpose_40, transpose_41, False, True) + del transpose_40, transpose_41 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_12 = paddle._C_ops.scale(matmul_84, full_3, float("0"), True) + del matmul_84 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_96 = paddle._C_ops.add(scale_12, scale_1) + del scale_12 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_10 = paddle._C_ops.softmax(add_96, -1) + del add_96 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_85 = paddle._C_ops.matmul(dropout_42, transpose_42, False, False) + del dropout_42, transpose_42 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_85, [0, 2, 1, 3]) + del matmul_85 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_5) + del transpose_43 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_86 = paddle._C_ops.matmul(reshape_43, parameter_10, False, False) + del reshape_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_86, parameter_9) + del matmul_86 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_97, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_97 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_98 = paddle._C_ops.add(layer_norm_60, dropout_44) + del dropout_44, layer_norm_60 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_98 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_87 = paddle._C_ops.matmul(layer_norm_63, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_5) + del matmul_87 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_99, False) + del add_99 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_88 = paddle._C_ops.matmul(gelu_10, parameter_4, False, False) + del gelu_10 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_100 = paddle._C_ops.add(matmul_88, parameter_3) + del matmul_88 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_101 = paddle._C_ops.add(add_100, layer_norm_63) + del add_100, layer_norm_63 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_101 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_66, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_15) + del matmul_89, parameter_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_66, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_13) + del matmul_90, parameter_13 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_66, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_104 = paddle._C_ops.add(matmul_91, parameter_11) + del matmul_91, parameter_11 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_102, full_int_array_4) + del add_102 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_103, full_int_array_4) + del add_103 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_104, full_int_array_4) + del add_104, full_int_array_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_92 = paddle._C_ops.matmul(transpose_44, transpose_45, False, True) + del transpose_44, transpose_45 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_13 = paddle._C_ops.scale(matmul_92, full_3, float("0"), True) + del full_3, matmul_92 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_105 = paddle._C_ops.add(scale_13, scale_1) + del scale_1, scale_13 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_11 = paddle._C_ops.softmax(add_105, -1) + del add_105 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_93 = paddle._C_ops.matmul(dropout_46, transpose_46, False, False) + del dropout_46, transpose_46 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_93, [0, 2, 1, 3]) + del matmul_93 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_5) + del full_int_array_5, transpose_47 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_47, parameter_10, False, False) + del parameter_10, reshape_47 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_106 = paddle._C_ops.add(matmul_94, parameter_9) + del matmul_94, parameter_9 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_106, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_106, full_2 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_107 = paddle._C_ops.add(layer_norm_66, dropout_48) + del dropout_48, layer_norm_66 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_107, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_107, parameter_7, parameter_8 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_69, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_5) + del matmul_95, parameter_5 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_108, False) + del add_108 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(gelu_11, parameter_4, False, False) + del gelu_11, parameter_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_109 = paddle._C_ops.add(matmul_96, parameter_3) + del matmul_96, parameter_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_110 = paddle._C_ops.add(add_109, layer_norm_69) + del add_109, layer_norm_69 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_110, parameter_17, parameter_18 + + # pd_op.slice: (1x768xf32) <- (1x21x768xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x768xf32) <- (1x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x768xf32) <- (1x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_97, parameter_1) + del matmul_97, parameter_1 + + # pd_op.tanh: (1x768xf32) <- (1x768xf32) + tanh_0 = paddle._C_ops.tanh(add_111) + del add_111, layer_norm_72 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-base-v1/weight_meta.py b/paddle_samples/PaddleNLP/albert-base-v1/weight_meta.py new file mode 100644 index 000000000..f9edf4996 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v1/weight_meta.py @@ -0,0 +1,235 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950393") + max_val = float("0.0949818") + mean = float("2.1163e-05") + std = float("0.0199833") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0978883") + max_val = float("0.0982025") + mean = float("-9.18199e-06") + std = float("0.0199946") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101063") + max_val = float("0.100538") + mean = float("1.45909e-05") + std = float("0.0200083") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0869559") + max_val = float("0.10256") + mean = float("-1.37491e-05") + std = float("0.0200072") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.095139") + max_val = float("0.0959922") + mean = float("3.0233e-05") + std = float("0.0199653") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0921497") + max_val = float("0.0860873") + mean = float("3.58198e-05") + std = float("0.019985") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0895908") + max_val = float("0.08705") + mean = float("7.12779e-06") + std = float("0.0199925") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 768] + dtype = "float32" + min_val = float("-0.0824699") + max_val = float("0.0883701") + mean = float("5.90966e-05") + std = float("0.0199163") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0438373") + max_val = float("0.0567006") + mean = float("0.00175291") + std = float("0.0182297") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0952002") + max_val = float("0.0822103") + mean = float("0.000103211") + std = float("0.0200516") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [30000, 128] + dtype = "float32" + min_val = float("-0.0970852") + max_val = float("0.110504") + mean = float("5.30125e-06") + std = float("0.0200003") + data = None diff --git a/paddle_samples/PaddleNLP/albert-base-v2/graph_net.json b/paddle_samples/PaddleNLP/albert-base-v2/graph_net.json new file mode 100644 index 000000000..ae04e4634 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v2/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-base-v2", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-base-v2/input_meta.py b/paddle_samples/PaddleNLP/albert-base-v2/input_meta.py new file mode 100644 index 000000000..b45834638 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v2/input_meta.py @@ -0,0 +1,41 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 21] + dtype = "int64" + data = [ + 2, + 10975, + 15, + 51, + 204, + 25, + 1909, + 9, + 31, + 589, + 2477, + 88, + 370, + 816, + 2761, + 17, + 66, + 2607, + 18, + 9, + 3, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 21] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 21] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-base-v2/model.py b/paddle_samples/PaddleNLP/albert-base-v2/model.py new file mode 100644 index 000000000..ec2624730 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v2/model.py @@ -0,0 +1,2003 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x21xi64) <- (1x21xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x21xi64) <- (1x1x21xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x21xf32) <- (1x1x1x21xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [21] + + # pd_op.slice: (1x21xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 30000x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x21x128xf32, 1x21xf32, 1x21xf32) <- (1x21x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x128xf32, 128x768xf32) + matmul_0 = paddle._C_ops.matmul(layer_norm_0, parameter_20, False, False) + del layer_norm_0, parameter_20 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 21, 12, 64] + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_2, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_5 = paddle._C_ops.matmul(softmax_0, transpose_2, False, False) + del softmax_0, transpose_2 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_8 = paddle._C_ops.add(add_2, add_7) + del add_2, add_7 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.5"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_3 = paddle._C_ops.scale(add_9, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_0 = paddle._C_ops.pow(add_9, float("3")) + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.044715"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_4 = paddle._C_ops.scale(pow_0, full_4, float("0"), True) + del pow_0 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_10 = paddle._C_ops.add(add_9, scale_4) + del add_9, scale_4 + + # pd_op.full: (1xf32) <- () + full_5 = paddle._C_ops.full( + [1], float("0.797885"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_5 = paddle._C_ops.scale(add_10, full_5, float("0"), True) + del add_10 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_1 = paddle._C_ops.tanh(scale_5) + del scale_5 + + # pd_op.full: (1xf32) <- () + full_6 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_6 = paddle._C_ops.scale(tanh_1, full_6, float("1"), True) + del tanh_1 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_0 = paddle._C_ops.multiply(scale_3, scale_6) + del scale_3, scale_6 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_8 = paddle._C_ops.matmul(multiply_0, parameter_4, False, False) + del multiply_0 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_11 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_12 = paddle._C_ops.add(add_11, layer_norm_3) + del add_11, layer_norm_3 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_12, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_12 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_15, full_int_array_4) + del add_15 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_7 = paddle._C_ops.scale(matmul_12, full_2, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_16 = paddle._C_ops.add(scale_7, scale_1) + del scale_7 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_1 = paddle._C_ops.softmax(add_16, -1) + del add_16 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_13 = paddle._C_ops.matmul(softmax_1, transpose_6, False, False) + del softmax_1, transpose_6 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_18 = paddle._C_ops.add(layer_norm_6, add_17) + del add_17, layer_norm_6 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_8 = paddle._C_ops.scale(add_19, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_1 = paddle._C_ops.pow(add_19, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_9 = paddle._C_ops.scale(pow_1, full_4, float("0"), True) + del pow_1 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_20 = paddle._C_ops.add(add_19, scale_9) + del add_19, scale_9 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_10 = paddle._C_ops.scale(add_20, full_5, float("0"), True) + del add_20 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_2 = paddle._C_ops.tanh(scale_10) + del scale_10 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_11 = paddle._C_ops.scale(tanh_2, full_6, float("1"), True) + del tanh_2 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_1 = paddle._C_ops.multiply(scale_8, scale_11) + del scale_11, scale_8 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_16 = paddle._C_ops.matmul(multiply_1, parameter_4, False, False) + del multiply_1 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_22 = paddle._C_ops.add(add_21, layer_norm_9) + del add_21, layer_norm_9 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_22, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_22 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_23 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_25 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_24, full_int_array_4) + del add_24 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_25, full_int_array_4) + del add_25 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_12 = paddle._C_ops.scale(matmul_20, full_2, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_26 = paddle._C_ops.add(scale_12, scale_1) + del scale_12 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_2 = paddle._C_ops.softmax(add_26, -1) + del add_26 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_21 = paddle._C_ops.matmul(softmax_2, transpose_10, False, False) + del softmax_2, transpose_10 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_27 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_28 = paddle._C_ops.add(layer_norm_12, add_27) + del add_27, layer_norm_12 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_28 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_29 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_13 = paddle._C_ops.scale(add_29, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_2 = paddle._C_ops.pow(add_29, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_14 = paddle._C_ops.scale(pow_2, full_4, float("0"), True) + del pow_2 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_30 = paddle._C_ops.add(add_29, scale_14) + del add_29, scale_14 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_15 = paddle._C_ops.scale(add_30, full_5, float("0"), True) + del add_30 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_3 = paddle._C_ops.tanh(scale_15) + del scale_15 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_16 = paddle._C_ops.scale(tanh_3, full_6, float("1"), True) + del tanh_3 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_2 = paddle._C_ops.multiply(scale_13, scale_16) + del scale_13, scale_16 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_24 = paddle._C_ops.matmul(multiply_2, parameter_4, False, False) + del multiply_2 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_32 = paddle._C_ops.add(add_31, layer_norm_15) + del add_31, layer_norm_15 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_32, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_32 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_34 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_33, full_int_array_4) + del add_33 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_34, full_int_array_4) + del add_34 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_35, full_int_array_4) + del add_35 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_17 = paddle._C_ops.scale(matmul_28, full_2, float("0"), True) + del matmul_28 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_36 = paddle._C_ops.add(scale_17, scale_1) + del scale_17 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_3 = paddle._C_ops.softmax(add_36, -1) + del add_36 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_29 = paddle._C_ops.matmul(softmax_3, transpose_14, False, False) + del softmax_3, transpose_14 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del transpose_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del reshape_15 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_38 = paddle._C_ops.add(layer_norm_18, add_37) + del add_37, layer_norm_18 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_39 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_18 = paddle._C_ops.scale(add_39, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_3 = paddle._C_ops.pow(add_39, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_19 = paddle._C_ops.scale(pow_3, full_4, float("0"), True) + del pow_3 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_40 = paddle._C_ops.add(add_39, scale_19) + del add_39, scale_19 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_20 = paddle._C_ops.scale(add_40, full_5, float("0"), True) + del add_40 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_4 = paddle._C_ops.tanh(scale_20) + del scale_20 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_21 = paddle._C_ops.scale(tanh_4, full_6, float("1"), True) + del tanh_4 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_3 = paddle._C_ops.multiply(scale_18, scale_21) + del scale_18, scale_21 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_32 = paddle._C_ops.matmul(multiply_3, parameter_4, False, False) + del multiply_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_42 = paddle._C_ops.add(add_41, layer_norm_21) + del add_41, layer_norm_21 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_42 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_43 = paddle._C_ops.add(matmul_33, parameter_15) + del matmul_33 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_13) + del matmul_34 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_45 = paddle._C_ops.add(matmul_35, parameter_11) + del matmul_35 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_43, full_int_array_4) + del add_43 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_44, full_int_array_4) + del add_44 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_45, full_int_array_4) + del add_45 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_36 = paddle._C_ops.matmul(transpose_16, transpose_17, False, True) + del transpose_16, transpose_17 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_22 = paddle._C_ops.scale(matmul_36, full_2, float("0"), True) + del matmul_36 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_46 = paddle._C_ops.add(scale_22, scale_1) + del scale_22 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_4 = paddle._C_ops.softmax(add_46, -1) + del add_46 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_37 = paddle._C_ops.matmul(softmax_4, transpose_18, False, False) + del softmax_4, transpose_18 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_5) + del transpose_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_10, False, False) + del reshape_19 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_38, parameter_9) + del matmul_38 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_48 = paddle._C_ops.add(layer_norm_24, add_47) + del add_47, layer_norm_24 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_48 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_49 = paddle._C_ops.add(matmul_39, parameter_5) + del matmul_39 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_23 = paddle._C_ops.scale(add_49, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_4 = paddle._C_ops.pow(add_49, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_24 = paddle._C_ops.scale(pow_4, full_4, float("0"), True) + del pow_4 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_50 = paddle._C_ops.add(add_49, scale_24) + del add_49, scale_24 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_25 = paddle._C_ops.scale(add_50, full_5, float("0"), True) + del add_50 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_5 = paddle._C_ops.tanh(scale_25) + del scale_25 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_26 = paddle._C_ops.scale(tanh_5, full_6, float("1"), True) + del tanh_5 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_4 = paddle._C_ops.multiply(scale_23, scale_26) + del scale_23, scale_26 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_40 = paddle._C_ops.matmul(multiply_4, parameter_4, False, False) + del multiply_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_51 = paddle._C_ops.add(matmul_40, parameter_3) + del matmul_40 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_52 = paddle._C_ops.add(add_51, layer_norm_27) + del add_51, layer_norm_27 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_52, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_52 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_53 = paddle._C_ops.add(matmul_41, parameter_15) + del matmul_41 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_54 = paddle._C_ops.add(matmul_42, parameter_13) + del matmul_42 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_43, parameter_11) + del matmul_43 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_53, full_int_array_4) + del add_53 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_54, full_int_array_4) + del add_54 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_55, full_int_array_4) + del add_55 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_44 = paddle._C_ops.matmul(transpose_20, transpose_21, False, True) + del transpose_20, transpose_21 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_27 = paddle._C_ops.scale(matmul_44, full_2, float("0"), True) + del matmul_44 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_56 = paddle._C_ops.add(scale_27, scale_1) + del scale_27 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_5 = paddle._C_ops.softmax(add_56, -1) + del add_56 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_45 = paddle._C_ops.matmul(softmax_5, transpose_22, False, False) + del softmax_5, transpose_22 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_5) + del transpose_23 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_10, False, False) + del reshape_23 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_46, parameter_9) + del matmul_46 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_58 = paddle._C_ops.add(layer_norm_30, add_57) + del add_57, layer_norm_30 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_58, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_58 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_59 = paddle._C_ops.add(matmul_47, parameter_5) + del matmul_47 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_28 = paddle._C_ops.scale(add_59, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_5 = paddle._C_ops.pow(add_59, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_29 = paddle._C_ops.scale(pow_5, full_4, float("0"), True) + del pow_5 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_60 = paddle._C_ops.add(add_59, scale_29) + del add_59, scale_29 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_30 = paddle._C_ops.scale(add_60, full_5, float("0"), True) + del add_60 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_6 = paddle._C_ops.tanh(scale_30) + del scale_30 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_31 = paddle._C_ops.scale(tanh_6, full_6, float("1"), True) + del tanh_6 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_5 = paddle._C_ops.multiply(scale_28, scale_31) + del scale_28, scale_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_48 = paddle._C_ops.matmul(multiply_5, parameter_4, False, False) + del multiply_5 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_48, parameter_3) + del matmul_48 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_62 = paddle._C_ops.add(add_61, layer_norm_33) + del add_61, layer_norm_33 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_62 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_36, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_63 = paddle._C_ops.add(matmul_49, parameter_15) + del matmul_49 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_36, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_50, parameter_13) + del matmul_50 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(layer_norm_36, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_65 = paddle._C_ops.add(matmul_51, parameter_11) + del matmul_51 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_63, full_int_array_4) + del add_63 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_64, full_int_array_4) + del add_64 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_65, full_int_array_4) + del add_65 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_52 = paddle._C_ops.matmul(transpose_24, transpose_25, False, True) + del transpose_24, transpose_25 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_32 = paddle._C_ops.scale(matmul_52, full_2, float("0"), True) + del matmul_52 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_66 = paddle._C_ops.add(scale_32, scale_1) + del scale_32 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_6 = paddle._C_ops.softmax(add_66, -1) + del add_66 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_53 = paddle._C_ops.matmul(softmax_6, transpose_26, False, False) + del softmax_6, transpose_26 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_5) + del transpose_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_10, False, False) + del reshape_27 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_54, parameter_9) + del matmul_54 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_68 = paddle._C_ops.add(layer_norm_36, add_67) + del add_67, layer_norm_36 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_68, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_68 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_39, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_69 = paddle._C_ops.add(matmul_55, parameter_5) + del matmul_55 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_33 = paddle._C_ops.scale(add_69, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_6 = paddle._C_ops.pow(add_69, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_34 = paddle._C_ops.scale(pow_6, full_4, float("0"), True) + del pow_6 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_70 = paddle._C_ops.add(add_69, scale_34) + del add_69, scale_34 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_35 = paddle._C_ops.scale(add_70, full_5, float("0"), True) + del add_70 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_7 = paddle._C_ops.tanh(scale_35) + del scale_35 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_36 = paddle._C_ops.scale(tanh_7, full_6, float("1"), True) + del tanh_7 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_6 = paddle._C_ops.multiply(scale_33, scale_36) + del scale_33, scale_36 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(multiply_6, parameter_4, False, False) + del multiply_6 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_71 = paddle._C_ops.add(matmul_56, parameter_3) + del matmul_56 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_72 = paddle._C_ops.add(add_71, layer_norm_39) + del add_71, layer_norm_39 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_72, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_72 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_42, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_73 = paddle._C_ops.add(matmul_57, parameter_15) + del matmul_57 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_42, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_74 = paddle._C_ops.add(matmul_58, parameter_13) + del matmul_58 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(layer_norm_42, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_59, parameter_11) + del matmul_59 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_73, full_int_array_4) + del add_73 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_74, full_int_array_4) + del add_74 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_75, full_int_array_4) + del add_75 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_60 = paddle._C_ops.matmul(transpose_28, transpose_29, False, True) + del transpose_28, transpose_29 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_37 = paddle._C_ops.scale(matmul_60, full_2, float("0"), True) + del matmul_60 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_76 = paddle._C_ops.add(scale_37, scale_1) + del scale_37 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_7 = paddle._C_ops.softmax(add_76, -1) + del add_76 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_61 = paddle._C_ops.matmul(softmax_7, transpose_30, False, False) + del softmax_7, transpose_30 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_61, [0, 2, 1, 3]) + del matmul_61 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_5) + del transpose_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(reshape_31, parameter_10, False, False) + del reshape_31 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_77 = paddle._C_ops.add(matmul_62, parameter_9) + del matmul_62 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_78 = paddle._C_ops.add(layer_norm_42, add_77) + del add_77, layer_norm_42 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_78, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_78 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_45, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_79 = paddle._C_ops.add(matmul_63, parameter_5) + del matmul_63 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_38 = paddle._C_ops.scale(add_79, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_7 = paddle._C_ops.pow(add_79, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_39 = paddle._C_ops.scale(pow_7, full_4, float("0"), True) + del pow_7 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_80 = paddle._C_ops.add(add_79, scale_39) + del add_79, scale_39 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_40 = paddle._C_ops.scale(add_80, full_5, float("0"), True) + del add_80 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_8 = paddle._C_ops.tanh(scale_40) + del scale_40 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_41 = paddle._C_ops.scale(tanh_8, full_6, float("1"), True) + del tanh_8 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_7 = paddle._C_ops.multiply(scale_38, scale_41) + del scale_38, scale_41 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_64 = paddle._C_ops.matmul(multiply_7, parameter_4, False, False) + del multiply_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_81 = paddle._C_ops.add(matmul_64, parameter_3) + del matmul_64 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_82 = paddle._C_ops.add(add_81, layer_norm_45) + del add_81, layer_norm_45 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_82, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_82 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_48, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_65, parameter_15) + del matmul_65 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_48, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_66, parameter_13) + del matmul_66 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_48, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_67, parameter_11) + del matmul_67 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_83, full_int_array_4) + del add_83 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_84, full_int_array_4) + del add_84 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_85, full_int_array_4) + del add_85 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_68 = paddle._C_ops.matmul(transpose_32, transpose_33, False, True) + del transpose_32, transpose_33 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_42 = paddle._C_ops.scale(matmul_68, full_2, float("0"), True) + del matmul_68 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_86 = paddle._C_ops.add(scale_42, scale_1) + del scale_42 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_8 = paddle._C_ops.softmax(add_86, -1) + del add_86 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_69 = paddle._C_ops.matmul(softmax_8, transpose_34, False, False) + del softmax_8, transpose_34 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_5) + del transpose_35 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_70 = paddle._C_ops.matmul(reshape_35, parameter_10, False, False) + del reshape_35 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_70, parameter_9) + del matmul_70 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_88 = paddle._C_ops.add(layer_norm_48, add_87) + del add_87, layer_norm_48 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_88 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_71 = paddle._C_ops.matmul(layer_norm_51, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_89 = paddle._C_ops.add(matmul_71, parameter_5) + del matmul_71 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_43 = paddle._C_ops.scale(add_89, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_8 = paddle._C_ops.pow(add_89, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_44 = paddle._C_ops.scale(pow_8, full_4, float("0"), True) + del pow_8 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_90 = paddle._C_ops.add(add_89, scale_44) + del add_89, scale_44 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_45 = paddle._C_ops.scale(add_90, full_5, float("0"), True) + del add_90 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_9 = paddle._C_ops.tanh(scale_45) + del scale_45 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_46 = paddle._C_ops.scale(tanh_9, full_6, float("1"), True) + del tanh_9 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_8 = paddle._C_ops.multiply(scale_43, scale_46) + del scale_43, scale_46 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_72 = paddle._C_ops.matmul(multiply_8, parameter_4, False, False) + del multiply_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_91 = paddle._C_ops.add(matmul_72, parameter_3) + del matmul_72 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_92 = paddle._C_ops.add(add_91, layer_norm_51) + del add_91, layer_norm_51 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_92, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_92 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_54, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_73, parameter_15) + del matmul_73 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_54, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_74, parameter_13) + del matmul_74 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_54, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_95 = paddle._C_ops.add(matmul_75, parameter_11) + del matmul_75 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_93, full_int_array_4) + del add_93 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_94, full_int_array_4) + del add_94 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_95, full_int_array_4) + del add_95 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_76 = paddle._C_ops.matmul(transpose_36, transpose_37, False, True) + del transpose_36, transpose_37 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_47 = paddle._C_ops.scale(matmul_76, full_2, float("0"), True) + del matmul_76 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_96 = paddle._C_ops.add(scale_47, scale_1) + del scale_47 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_9 = paddle._C_ops.softmax(add_96, -1) + del add_96 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_77 = paddle._C_ops.matmul(softmax_9, transpose_38, False, False) + del softmax_9, transpose_38 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_77, [0, 2, 1, 3]) + del matmul_77 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_5) + del transpose_39 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(reshape_39, parameter_10, False, False) + del reshape_39 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_78, parameter_9) + del matmul_78 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_98 = paddle._C_ops.add(layer_norm_54, add_97) + del add_97, layer_norm_54 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_98 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_57, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_99 = paddle._C_ops.add(matmul_79, parameter_5) + del matmul_79 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_48 = paddle._C_ops.scale(add_99, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_9 = paddle._C_ops.pow(add_99, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_49 = paddle._C_ops.scale(pow_9, full_4, float("0"), True) + del pow_9 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_100 = paddle._C_ops.add(add_99, scale_49) + del add_99, scale_49 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_50 = paddle._C_ops.scale(add_100, full_5, float("0"), True) + del add_100 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_10 = paddle._C_ops.tanh(scale_50) + del scale_50 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_51 = paddle._C_ops.scale(tanh_10, full_6, float("1"), True) + del tanh_10 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_9 = paddle._C_ops.multiply(scale_48, scale_51) + del scale_48, scale_51 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_80 = paddle._C_ops.matmul(multiply_9, parameter_4, False, False) + del multiply_9 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_101 = paddle._C_ops.add(matmul_80, parameter_3) + del matmul_80 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_102 = paddle._C_ops.add(add_101, layer_norm_57) + del add_101, layer_norm_57 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_102, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_102 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_60, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_81, parameter_15) + del matmul_81 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_60, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_104 = paddle._C_ops.add(matmul_82, parameter_13) + del matmul_82 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_60, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_83, parameter_11) + del matmul_83 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_103, full_int_array_4) + del add_103 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_104, full_int_array_4) + del add_104 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_105, full_int_array_4) + del add_105 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_84 = paddle._C_ops.matmul(transpose_40, transpose_41, False, True) + del transpose_40, transpose_41 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_52 = paddle._C_ops.scale(matmul_84, full_2, float("0"), True) + del matmul_84 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_106 = paddle._C_ops.add(scale_52, scale_1) + del scale_52 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_10 = paddle._C_ops.softmax(add_106, -1) + del add_106 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_85 = paddle._C_ops.matmul(softmax_10, transpose_42, False, False) + del softmax_10, transpose_42 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_85, [0, 2, 1, 3]) + del matmul_85 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_5) + del transpose_43 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_86 = paddle._C_ops.matmul(reshape_43, parameter_10, False, False) + del reshape_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_107 = paddle._C_ops.add(matmul_86, parameter_9) + del matmul_86 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_108 = paddle._C_ops.add(layer_norm_60, add_107) + del add_107, layer_norm_60 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_108, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_108 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_87 = paddle._C_ops.matmul(layer_norm_63, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_109 = paddle._C_ops.add(matmul_87, parameter_5) + del matmul_87 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_53 = paddle._C_ops.scale(add_109, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_10 = paddle._C_ops.pow(add_109, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_54 = paddle._C_ops.scale(pow_10, full_4, float("0"), True) + del pow_10 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_110 = paddle._C_ops.add(add_109, scale_54) + del add_109, scale_54 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_55 = paddle._C_ops.scale(add_110, full_5, float("0"), True) + del add_110 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_11 = paddle._C_ops.tanh(scale_55) + del scale_55 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_56 = paddle._C_ops.scale(tanh_11, full_6, float("1"), True) + del tanh_11 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_10 = paddle._C_ops.multiply(scale_53, scale_56) + del scale_53, scale_56 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_88 = paddle._C_ops.matmul(multiply_10, parameter_4, False, False) + del multiply_10 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_88, parameter_3) + del matmul_88 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_112 = paddle._C_ops.add(add_111, layer_norm_63) + del add_111, layer_norm_63 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_112 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_66, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_113 = paddle._C_ops.add(matmul_89, parameter_15) + del matmul_89, parameter_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_66, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_114 = paddle._C_ops.add(matmul_90, parameter_13) + del matmul_90, parameter_13 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_66, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_115 = paddle._C_ops.add(matmul_91, parameter_11) + del matmul_91, parameter_11 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_113, full_int_array_4) + del add_113 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_114, full_int_array_4) + del add_114 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_115, full_int_array_4) + del add_115, full_int_array_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_92 = paddle._C_ops.matmul(transpose_44, transpose_45, False, True) + del transpose_44, transpose_45 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_57 = paddle._C_ops.scale(matmul_92, full_2, float("0"), True) + del full_2, matmul_92 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_116 = paddle._C_ops.add(scale_57, scale_1) + del scale_1, scale_57 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_11 = paddle._C_ops.softmax(add_116, -1) + del add_116 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_93 = paddle._C_ops.matmul(softmax_11, transpose_46, False, False) + del softmax_11, transpose_46 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_93, [0, 2, 1, 3]) + del matmul_93 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_5) + del full_int_array_5, transpose_47 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_47, parameter_10, False, False) + del parameter_10, reshape_47 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_117 = paddle._C_ops.add(matmul_94, parameter_9) + del matmul_94, parameter_9 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_118 = paddle._C_ops.add(layer_norm_66, add_117) + del add_117, layer_norm_66 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_118, parameter_7, parameter_8 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_69, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_119 = paddle._C_ops.add(matmul_95, parameter_5) + del matmul_95, parameter_5 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_58 = paddle._C_ops.scale(add_119, full_3, float("0"), True) + del full_3 + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_11 = paddle._C_ops.pow(add_119, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_59 = paddle._C_ops.scale(pow_11, full_4, float("0"), True) + del full_4, pow_11 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_120 = paddle._C_ops.add(add_119, scale_59) + del add_119, scale_59 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_60 = paddle._C_ops.scale(add_120, full_5, float("0"), True) + del add_120, full_5 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_12 = paddle._C_ops.tanh(scale_60) + del scale_60 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_61 = paddle._C_ops.scale(tanh_12, full_6, float("1"), True) + del full_6, tanh_12 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_11 = paddle._C_ops.multiply(scale_58, scale_61) + del scale_58, scale_61 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(multiply_11, parameter_4, False, False) + del multiply_11, parameter_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_121 = paddle._C_ops.add(matmul_96, parameter_3) + del matmul_96, parameter_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_122 = paddle._C_ops.add(add_121, layer_norm_69) + del add_121, layer_norm_69 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_122, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_122, parameter_17, parameter_18 + + # pd_op.slice: (1x768xf32) <- (1x21x768xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x768xf32) <- (1x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x768xf32) <- (1x768xf32, 768xf32) + add_123 = paddle._C_ops.add(matmul_97, parameter_1) + del matmul_97, parameter_1 + + # pd_op.tanh: (1x768xf32) <- (1x768xf32) + tanh_0 = paddle._C_ops.tanh(add_123) + del add_123, layer_norm_72 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-base-v2/weight_meta.py b/paddle_samples/PaddleNLP/albert-base-v2/weight_meta.py new file mode 100644 index 000000000..7588cbb6e --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v2/weight_meta.py @@ -0,0 +1,235 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0887999") + max_val = float("0.107858") + mean = float("9.86864e-06") + std = float("0.0199999") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.101961") + max_val = float("0.107306") + mean = float("-1.01831e-05") + std = float("0.0199888") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0937552") + max_val = float("0.100479") + mean = float("4.65614e-06") + std = float("0.0200006") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0929592") + max_val = float("0.0998105") + mean = float("4.01795e-05") + std = float("0.0200188") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0910254") + max_val = float("0.0940073") + mean = float("1.1987e-05") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0958641") + max_val = float("0.0952735") + mean = float("-4.36463e-05") + std = float("0.0199898") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0925532") + max_val = float("0.0965498") + mean = float("-4.51693e-05") + std = float("0.0200245") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 768] + dtype = "float32" + min_val = float("-0.0868425") + max_val = float("0.0925445") + mean = float("-0.000119993") + std = float("0.0200474") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0554865") + max_val = float("0.0556627") + mean = float("-0.00351806") + std = float("0.0195563") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0802345") + max_val = float("0.0806108") + mean = float("4.65631e-05") + std = float("0.0200016") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [30000, 128] + dtype = "float32" + min_val = float("-0.103231") + max_val = float("0.10054") + mean = float("-1.42952e-05") + std = float("0.0199984") + data = None diff --git a/paddle_samples/PaddleNLP/albert-chinese-base/graph_net.json b/paddle_samples/PaddleNLP/albert-chinese-base/graph_net.json new file mode 100644 index 000000000..16cf1535c --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-base/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-chinese-base", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-chinese-base/input_meta.py b/paddle_samples/PaddleNLP/albert-chinese-base/input_meta.py new file mode 100644 index 000000000..3708564f7 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-base/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3614, 6816, 886, 4500, 4636, 2428, 7607, 3444, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-chinese-base/model.py b/paddle_samples/PaddleNLP/albert-chinese-base/model.py new file mode 100644 index 000000000..51da31093 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-base/model.py @@ -0,0 +1,1670 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x11xi64) <- (1x11xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x11xi64) <- (1x1x11xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x11xf32) <- (1x1x1x11xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [11] + + # pd_op.slice: (1x11xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 21128x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x11x128xf32, 1x11xf32, 1x11xf32) <- (1x11x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x128xf32, 128x768xf32) + matmul_0 = paddle._C_ops.matmul(layer_norm_0, parameter_20, False, False) + del layer_norm_0, parameter_20 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 11, 12, 64] + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_2, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_5 = paddle._C_ops.matmul(softmax_0, transpose_2, False, False) + del softmax_0, transpose_2 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_8 = paddle._C_ops.add(add_2, add_7) + del add_2, add_7 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_0 = paddle._C_ops.relu(add_9) + del add_9 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_8 = paddle._C_ops.matmul(relu_0, parameter_4, False, False) + del relu_0 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_11 = paddle._C_ops.add(add_10, layer_norm_3) + del add_10, layer_norm_3 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_4) + del add_12 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_3 = paddle._C_ops.scale(matmul_12, full_2, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_15 = paddle._C_ops.add(scale_3, scale_1) + del scale_3 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_13 = paddle._C_ops.matmul(softmax_1, transpose_6, False, False) + del softmax_1, transpose_6 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_17 = paddle._C_ops.add(layer_norm_6, add_16) + del add_16, layer_norm_6 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_1 = paddle._C_ops.relu(add_18) + del add_18 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_16 = paddle._C_ops.matmul(relu_1, parameter_4, False, False) + del relu_1 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_20 = paddle._C_ops.add(add_19, layer_norm_9) + del add_19, layer_norm_9 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_4) + del add_21 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_4) + del add_22 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_4 = paddle._C_ops.scale(matmul_20, full_2, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_24 = paddle._C_ops.add(scale_4, scale_1) + del scale_4 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_21 = paddle._C_ops.matmul(softmax_2, transpose_10, False, False) + del softmax_2, transpose_10 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_26 = paddle._C_ops.add(layer_norm_12, add_25) + del add_25, layer_norm_12 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_2 = paddle._C_ops.relu(add_27) + del add_27 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_24 = paddle._C_ops.matmul(relu_2, parameter_4, False, False) + del relu_2 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_29 = paddle._C_ops.add(add_28, layer_norm_15) + del add_28, layer_norm_15 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_4) + del add_30 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_4) + del add_31 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_4) + del add_32 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_28, full_2, float("0"), True) + del matmul_28 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_33 = paddle._C_ops.add(scale_5, scale_1) + del scale_5 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_29 = paddle._C_ops.matmul(softmax_3, transpose_14, False, False) + del softmax_3, transpose_14 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del transpose_15 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del reshape_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_35 = paddle._C_ops.add(layer_norm_18, add_34) + del add_34, layer_norm_18 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_3 = paddle._C_ops.relu(add_36) + del add_36 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_32 = paddle._C_ops.matmul(relu_3, parameter_4, False, False) + del relu_3 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_38 = paddle._C_ops.add(add_37, layer_norm_21) + del add_37, layer_norm_21 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_15) + del matmul_33 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_13) + del matmul_34 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_35, parameter_11) + del matmul_35 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_39, full_int_array_4) + del add_39 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_40, full_int_array_4) + del add_40 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_41, full_int_array_4) + del add_41 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_36 = paddle._C_ops.matmul(transpose_16, transpose_17, False, True) + del transpose_16, transpose_17 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_6 = paddle._C_ops.scale(matmul_36, full_2, float("0"), True) + del matmul_36 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_42 = paddle._C_ops.add(scale_6, scale_1) + del scale_6 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_37 = paddle._C_ops.matmul(softmax_4, transpose_18, False, False) + del softmax_4, transpose_18 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_5) + del transpose_19 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_10, False, False) + del reshape_19 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_43 = paddle._C_ops.add(matmul_38, parameter_9) + del matmul_38 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_44 = paddle._C_ops.add(layer_norm_24, add_43) + del add_43, layer_norm_24 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_44, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_44 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_5) + del matmul_39 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_4 = paddle._C_ops.relu(add_45) + del add_45 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_40 = paddle._C_ops.matmul(relu_4, parameter_4, False, False) + del relu_4 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_46 = paddle._C_ops.add(matmul_40, parameter_3) + del matmul_40 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_47 = paddle._C_ops.add(add_46, layer_norm_27) + del add_46, layer_norm_27 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_47, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_47 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_15) + del matmul_41 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_13) + del matmul_42 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_50 = paddle._C_ops.add(matmul_43, parameter_11) + del matmul_43 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_48, full_int_array_4) + del add_48 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_49, full_int_array_4) + del add_49 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_50, full_int_array_4) + del add_50 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_44 = paddle._C_ops.matmul(transpose_20, transpose_21, False, True) + del transpose_20, transpose_21 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_7 = paddle._C_ops.scale(matmul_44, full_2, float("0"), True) + del matmul_44 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_51 = paddle._C_ops.add(scale_7, scale_1) + del scale_7 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_51, -1) + del add_51 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_45 = paddle._C_ops.matmul(softmax_5, transpose_22, False, False) + del softmax_5, transpose_22 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_5) + del transpose_23 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_10, False, False) + del reshape_23 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_46, parameter_9) + del matmul_46 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_53 = paddle._C_ops.add(layer_norm_30, add_52) + del add_52, layer_norm_30 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_53 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_5) + del matmul_47 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_5 = paddle._C_ops.relu(add_54) + del add_54 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_48 = paddle._C_ops.matmul(relu_5, parameter_4, False, False) + del relu_5 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_48, parameter_3) + del matmul_48 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_56 = paddle._C_ops.add(add_55, layer_norm_33) + del add_55, layer_norm_33 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_56 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_36, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_15) + del matmul_49 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_36, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_13) + del matmul_50 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(layer_norm_36, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_59 = paddle._C_ops.add(matmul_51, parameter_11) + del matmul_51 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_57, full_int_array_4) + del add_57 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_58, full_int_array_4) + del add_58 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_59, full_int_array_4) + del add_59 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_52 = paddle._C_ops.matmul(transpose_24, transpose_25, False, True) + del transpose_24, transpose_25 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_8 = paddle._C_ops.scale(matmul_52, full_2, float("0"), True) + del matmul_52 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_60 = paddle._C_ops.add(scale_8, scale_1) + del scale_8 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_6 = paddle._C_ops.softmax(add_60, -1) + del add_60 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_53 = paddle._C_ops.matmul(softmax_6, transpose_26, False, False) + del softmax_6, transpose_26 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_5) + del transpose_27 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_10, False, False) + del reshape_27 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_54, parameter_9) + del matmul_54 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_62 = paddle._C_ops.add(layer_norm_36, add_61) + del add_61, layer_norm_36 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_62 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_39, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_5) + del matmul_55 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_6 = paddle._C_ops.relu(add_63) + del add_63 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(relu_6, parameter_4, False, False) + del relu_6 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_56, parameter_3) + del matmul_56 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_65 = paddle._C_ops.add(add_64, layer_norm_39) + del add_64, layer_norm_39 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_65 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_42, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_15) + del matmul_57 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_42, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_13) + del matmul_58 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(layer_norm_42, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_68 = paddle._C_ops.add(matmul_59, parameter_11) + del matmul_59 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_66, full_int_array_4) + del add_66 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_67, full_int_array_4) + del add_67 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_68, full_int_array_4) + del add_68 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_60 = paddle._C_ops.matmul(transpose_28, transpose_29, False, True) + del transpose_28, transpose_29 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_9 = paddle._C_ops.scale(matmul_60, full_2, float("0"), True) + del matmul_60 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_69 = paddle._C_ops.add(scale_9, scale_1) + del scale_9 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_7 = paddle._C_ops.softmax(add_69, -1) + del add_69 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_61 = paddle._C_ops.matmul(softmax_7, transpose_30, False, False) + del softmax_7, transpose_30 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_61, [0, 2, 1, 3]) + del matmul_61 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_5) + del transpose_31 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(reshape_31, parameter_10, False, False) + del reshape_31 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_70 = paddle._C_ops.add(matmul_62, parameter_9) + del matmul_62 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_71 = paddle._C_ops.add(layer_norm_42, add_70) + del add_70, layer_norm_42 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_71, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_71 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_45, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_5) + del matmul_63 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_7 = paddle._C_ops.relu(add_72) + del add_72 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_64 = paddle._C_ops.matmul(relu_7, parameter_4, False, False) + del relu_7 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_73 = paddle._C_ops.add(matmul_64, parameter_3) + del matmul_64 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_74 = paddle._C_ops.add(add_73, layer_norm_45) + del add_73, layer_norm_45 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_74, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_74 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_48, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_15) + del matmul_65 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_48, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_13) + del matmul_66 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_48, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_77 = paddle._C_ops.add(matmul_67, parameter_11) + del matmul_67 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_75, full_int_array_4) + del add_75 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_76, full_int_array_4) + del add_76 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_77, full_int_array_4) + del add_77 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_68 = paddle._C_ops.matmul(transpose_32, transpose_33, False, True) + del transpose_32, transpose_33 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_10 = paddle._C_ops.scale(matmul_68, full_2, float("0"), True) + del matmul_68 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_78 = paddle._C_ops.add(scale_10, scale_1) + del scale_10 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_8 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_69 = paddle._C_ops.matmul(softmax_8, transpose_34, False, False) + del softmax_8, transpose_34 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_5) + del transpose_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_70 = paddle._C_ops.matmul(reshape_35, parameter_10, False, False) + del reshape_35 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_79 = paddle._C_ops.add(matmul_70, parameter_9) + del matmul_70 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_80 = paddle._C_ops.add(layer_norm_48, add_79) + del add_79, layer_norm_48 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_80 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_71 = paddle._C_ops.matmul(layer_norm_51, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_5) + del matmul_71 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_8 = paddle._C_ops.relu(add_81) + del add_81 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_72 = paddle._C_ops.matmul(relu_8, parameter_4, False, False) + del relu_8 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_82 = paddle._C_ops.add(matmul_72, parameter_3) + del matmul_72 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_83 = paddle._C_ops.add(add_82, layer_norm_51) + del add_82, layer_norm_51 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_83 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_54, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_15) + del matmul_73 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_54, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_13) + del matmul_74 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_54, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_75, parameter_11) + del matmul_75 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_84, full_int_array_4) + del add_84 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_85, full_int_array_4) + del add_85 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_86, full_int_array_4) + del add_86 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_76 = paddle._C_ops.matmul(transpose_36, transpose_37, False, True) + del transpose_36, transpose_37 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_11 = paddle._C_ops.scale(matmul_76, full_2, float("0"), True) + del matmul_76 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_87 = paddle._C_ops.add(scale_11, scale_1) + del scale_11 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_9 = paddle._C_ops.softmax(add_87, -1) + del add_87 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_77 = paddle._C_ops.matmul(softmax_9, transpose_38, False, False) + del softmax_9, transpose_38 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_77, [0, 2, 1, 3]) + del matmul_77 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_5) + del transpose_39 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(reshape_39, parameter_10, False, False) + del reshape_39 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_88 = paddle._C_ops.add(matmul_78, parameter_9) + del matmul_78 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_89 = paddle._C_ops.add(layer_norm_54, add_88) + del add_88, layer_norm_54 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_89, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_89 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_57, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_5) + del matmul_79 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_9 = paddle._C_ops.relu(add_90) + del add_90 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_80 = paddle._C_ops.matmul(relu_9, parameter_4, False, False) + del relu_9 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_91 = paddle._C_ops.add(matmul_80, parameter_3) + del matmul_80 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_92 = paddle._C_ops.add(add_91, layer_norm_57) + del add_91, layer_norm_57 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_92, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_92 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_60, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_15) + del matmul_81 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_60, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_13) + del matmul_82 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_60, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_95 = paddle._C_ops.add(matmul_83, parameter_11) + del matmul_83 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_93, full_int_array_4) + del add_93 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_94, full_int_array_4) + del add_94 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_95, full_int_array_4) + del add_95 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_84 = paddle._C_ops.matmul(transpose_40, transpose_41, False, True) + del transpose_40, transpose_41 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_12 = paddle._C_ops.scale(matmul_84, full_2, float("0"), True) + del matmul_84 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_96 = paddle._C_ops.add(scale_12, scale_1) + del scale_12 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_10 = paddle._C_ops.softmax(add_96, -1) + del add_96 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_85 = paddle._C_ops.matmul(softmax_10, transpose_42, False, False) + del softmax_10, transpose_42 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_85, [0, 2, 1, 3]) + del matmul_85 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_5) + del transpose_43 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_86 = paddle._C_ops.matmul(reshape_43, parameter_10, False, False) + del reshape_43 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_86, parameter_9) + del matmul_86 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_98 = paddle._C_ops.add(layer_norm_60, add_97) + del add_97, layer_norm_60 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_98 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_87 = paddle._C_ops.matmul(layer_norm_63, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_5) + del matmul_87 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_10 = paddle._C_ops.relu(add_99) + del add_99 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_88 = paddle._C_ops.matmul(relu_10, parameter_4, False, False) + del relu_10 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_100 = paddle._C_ops.add(matmul_88, parameter_3) + del matmul_88 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_101 = paddle._C_ops.add(add_100, layer_norm_63) + del add_100, layer_norm_63 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_101 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_66, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_15) + del matmul_89, parameter_15 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_66, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_13) + del matmul_90, parameter_13 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_66, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_104 = paddle._C_ops.add(matmul_91, parameter_11) + del matmul_91, parameter_11 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_102, full_int_array_4) + del add_102 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_103, full_int_array_4) + del add_103 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_104, full_int_array_4) + del add_104, full_int_array_4 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_92 = paddle._C_ops.matmul(transpose_44, transpose_45, False, True) + del transpose_44, transpose_45 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_13 = paddle._C_ops.scale(matmul_92, full_2, float("0"), True) + del full_2, matmul_92 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_105 = paddle._C_ops.add(scale_13, scale_1) + del scale_1, scale_13 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_11 = paddle._C_ops.softmax(add_105, -1) + del add_105 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_93 = paddle._C_ops.matmul(softmax_11, transpose_46, False, False) + del softmax_11, transpose_46 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_93, [0, 2, 1, 3]) + del matmul_93 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_5) + del full_int_array_5, transpose_47 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_47, parameter_10, False, False) + del parameter_10, reshape_47 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_106 = paddle._C_ops.add(matmul_94, parameter_9) + del matmul_94, parameter_9 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_107 = paddle._C_ops.add(layer_norm_66, add_106) + del add_106, layer_norm_66 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_107, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_107, parameter_7, parameter_8 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_69, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_5) + del matmul_95, parameter_5 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_11 = paddle._C_ops.relu(add_108) + del add_108 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(relu_11, parameter_4, False, False) + del parameter_4, relu_11 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_109 = paddle._C_ops.add(matmul_96, parameter_3) + del matmul_96, parameter_3 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_110 = paddle._C_ops.add(add_109, layer_norm_69) + del add_109, layer_norm_69 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_110, parameter_17, parameter_18 + + # pd_op.slice: (1x768xf32) <- (1x11x768xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x768xf32) <- (1x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x768xf32) <- (1x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_97, parameter_1) + del matmul_97, parameter_1 + + # pd_op.tanh: (1x768xf32) <- (1x768xf32) + tanh_0 = paddle._C_ops.tanh(add_111) + del add_111, layer_norm_72 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-chinese-base/weight_meta.py b/paddle_samples/PaddleNLP/albert-chinese-base/weight_meta.py new file mode 100644 index 000000000..d9bd49e44 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-base/weight_meta.py @@ -0,0 +1,235 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0892679") + max_val = float("0.0906134") + mean = float("-3.00388e-06") + std = float("0.0199841") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.101152") + max_val = float("0.0999676") + mean = float("1.01817e-05") + std = float("0.019991") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0942874") + max_val = float("0.0995946") + mean = float("2.76684e-07") + std = float("0.0199973") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0992178") + max_val = float("0.0932706") + mean = float("2.48208e-05") + std = float("0.0200258") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0932884") + max_val = float("0.0917139") + mean = float("1.4562e-05") + std = float("0.0200167") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0930002") + max_val = float("0.0961642") + mean = float("-5.85622e-05") + std = float("0.019987") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916871") + max_val = float("0.089794") + mean = float("-3.99677e-05") + std = float("0.0200198") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 768] + dtype = "float32" + min_val = float("-0.0860205") + max_val = float("0.0973591") + mean = float("2.12175e-05") + std = float("0.0200251") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0590662") + max_val = float("0.0505173") + mean = float("-0.000780354") + std = float("0.0203115") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0905854") + max_val = float("0.0949802") + mean = float("1.62754e-05") + std = float("0.019975") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [21128, 128] + dtype = "float32" + min_val = float("-0.0995291") + max_val = float("0.0971798") + mean = float("-5.40105e-06") + std = float("0.0200164") + data = None diff --git a/paddle_samples/PaddleNLP/albert-chinese-small/graph_net.json b/paddle_samples/PaddleNLP/albert-chinese-small/graph_net.json new file mode 100644 index 000000000..aac3cc2f4 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-small/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-chinese-small", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-chinese-small/input_meta.py b/paddle_samples/PaddleNLP/albert-chinese-small/input_meta.py new file mode 100644 index 000000000..3708564f7 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-small/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3614, 6816, 886, 4500, 4636, 2428, 7607, 3444, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-chinese-small/model.py b/paddle_samples/PaddleNLP/albert-chinese-small/model.py new file mode 100644 index 000000000..aac4bc18d --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-small/model.py @@ -0,0 +1,914 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x11xi64) <- (1x11xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x11xi64) <- (1x1x11xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x11xf32) <- (1x1x1x11xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [11] + + # pd_op.slice: (1x11xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 21128x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x11x128xf32, 1x11xf32, 1x11xf32) <- (1x11x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x128xf32, 128x384xf32) + matmul_0 = paddle._C_ops.matmul(layer_norm_0, parameter_20, False, False) + del layer_norm_0, parameter_20 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 11, 12, 32] + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_2, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_5 = paddle._C_ops.matmul(softmax_0, transpose_2, False, False) + del softmax_0, transpose_2 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_8 = paddle._C_ops.add(add_2, add_7) + del add_2, add_7 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_0 = paddle._C_ops.gelu(add_9, False) + del add_9 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_8 = paddle._C_ops.matmul(gelu_0, parameter_4, False, False) + del gelu_0 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_11 = paddle._C_ops.add(add_10, layer_norm_3) + del add_10, layer_norm_3 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_4) + del add_12 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_3 = paddle._C_ops.scale(matmul_12, full_2, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_15 = paddle._C_ops.add(scale_3, scale_1) + del scale_3 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_13 = paddle._C_ops.matmul(softmax_1, transpose_6, False, False) + del softmax_1, transpose_6 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_17 = paddle._C_ops.add(layer_norm_6, add_16) + del add_16, layer_norm_6 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_1 = paddle._C_ops.gelu(add_18, False) + del add_18 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_16 = paddle._C_ops.matmul(gelu_1, parameter_4, False, False) + del gelu_1 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_20 = paddle._C_ops.add(add_19, layer_norm_9) + del add_19, layer_norm_9 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_4) + del add_21 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_4) + del add_22 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_4 = paddle._C_ops.scale(matmul_20, full_2, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_24 = paddle._C_ops.add(scale_4, scale_1) + del scale_4 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_21 = paddle._C_ops.matmul(softmax_2, transpose_10, False, False) + del softmax_2, transpose_10 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_26 = paddle._C_ops.add(layer_norm_12, add_25) + del add_25, layer_norm_12 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_2 = paddle._C_ops.gelu(add_27, False) + del add_27 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_24 = paddle._C_ops.matmul(gelu_2, parameter_4, False, False) + del gelu_2 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_29 = paddle._C_ops.add(add_28, layer_norm_15) + del add_28, layer_norm_15 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_4) + del add_30 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_4) + del add_31 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_4) + del add_32 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_28, full_2, float("0"), True) + del matmul_28 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_33 = paddle._C_ops.add(scale_5, scale_1) + del scale_5 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_29 = paddle._C_ops.matmul(softmax_3, transpose_14, False, False) + del softmax_3, transpose_14 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del transpose_15 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del reshape_15 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_35 = paddle._C_ops.add(layer_norm_18, add_34) + del add_34, layer_norm_18 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_3 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_32 = paddle._C_ops.matmul(gelu_3, parameter_4, False, False) + del gelu_3 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_38 = paddle._C_ops.add(add_37, layer_norm_21) + del add_37, layer_norm_21 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_15) + del matmul_33 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_13) + del matmul_34 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_41 = paddle._C_ops.add(matmul_35, parameter_11) + del matmul_35 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_39, full_int_array_4) + del add_39 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_40, full_int_array_4) + del add_40 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_41, full_int_array_4) + del add_41 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_36 = paddle._C_ops.matmul(transpose_16, transpose_17, False, True) + del transpose_16, transpose_17 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_6 = paddle._C_ops.scale(matmul_36, full_2, float("0"), True) + del matmul_36 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_42 = paddle._C_ops.add(scale_6, scale_1) + del scale_6 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_37 = paddle._C_ops.matmul(softmax_4, transpose_18, False, False) + del softmax_4, transpose_18 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_5) + del transpose_19 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_10, False, False) + del reshape_19 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_43 = paddle._C_ops.add(matmul_38, parameter_9) + del matmul_38 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_44 = paddle._C_ops.add(layer_norm_24, add_43) + del add_43, layer_norm_24 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_44, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_44 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_5) + del matmul_39 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_4 = paddle._C_ops.gelu(add_45, False) + del add_45 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_40 = paddle._C_ops.matmul(gelu_4, parameter_4, False, False) + del gelu_4 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_46 = paddle._C_ops.add(matmul_40, parameter_3) + del matmul_40 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_47 = paddle._C_ops.add(add_46, layer_norm_27) + del add_46, layer_norm_27 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_47, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_47 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_15) + del matmul_41, parameter_15 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_13) + del matmul_42, parameter_13 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_50 = paddle._C_ops.add(matmul_43, parameter_11) + del matmul_43, parameter_11 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_48, full_int_array_4) + del add_48 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_49, full_int_array_4) + del add_49 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_50, full_int_array_4) + del add_50, full_int_array_4 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_44 = paddle._C_ops.matmul(transpose_20, transpose_21, False, True) + del transpose_20, transpose_21 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_7 = paddle._C_ops.scale(matmul_44, full_2, float("0"), True) + del full_2, matmul_44 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_51 = paddle._C_ops.add(scale_7, scale_1) + del scale_1, scale_7 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_51, -1) + del add_51 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_45 = paddle._C_ops.matmul(softmax_5, transpose_22, False, False) + del softmax_5, transpose_22 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_5) + del full_int_array_5, transpose_23 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_10, False, False) + del parameter_10, reshape_23 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_52 = paddle._C_ops.add(matmul_46, parameter_9) + del matmul_46, parameter_9 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_53 = paddle._C_ops.add(layer_norm_30, add_52) + del add_52, layer_norm_30 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_53, parameter_7, parameter_8 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_5) + del matmul_47, parameter_5 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_5 = paddle._C_ops.gelu(add_54, False) + del add_54 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_48 = paddle._C_ops.matmul(gelu_5, parameter_4, False, False) + del gelu_5, parameter_4 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_55 = paddle._C_ops.add(matmul_48, parameter_3) + del matmul_48, parameter_3 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_56 = paddle._C_ops.add(add_55, layer_norm_33) + del add_55, layer_norm_33 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_56, parameter_17, parameter_18 + + # pd_op.slice: (1x384xf32) <- (1x11x384xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_36, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x384xf32) <- (1x384xf32, 384x384xf32) + matmul_49 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x384xf32) <- (1x384xf32, 384xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_1) + del matmul_49, parameter_1 + + # pd_op.tanh: (1x384xf32) <- (1x384xf32) + tanh_0 = paddle._C_ops.tanh(add_57) + del add_57, layer_norm_36 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-chinese-small/weight_meta.py b/paddle_samples/PaddleNLP/albert-chinese-small/weight_meta.py new file mode 100644 index 000000000..a92a1fce1 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-small/weight_meta.py @@ -0,0 +1,237 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0920974") + max_val = float("0.0898767") + mean = float("2.2093e-05") + std = float("0.0200502") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1536, 384] + dtype = "float32" + min_val = float("-0.0904018") + max_val = float("0.0969833") + mean = float("-3.42421e-05") + std = float("0.0199846") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [1536] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [384, 1536] + dtype = "float32" + min_val = float("-0.101128") + max_val = float("0.0953021") + mean = float("-3.2029e-05") + std = float("0.0200196") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [384] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + std = float("5.96046e-08") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.090305") + max_val = float("0.0853635") + mean = float("-3.53809e-05") + std = float("0.0199774") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0886189") + max_val = float("0.0784958") + mean = float("6.89041e-05") + std = float("0.0199391") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.085532") + max_val = float("0.0893973") + mean = float("1.21036e-05") + std = float("0.0199489") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.082137") + max_val = float("0.0837528") + mean = float("3.60444e-05") + std = float("0.0199884") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [384] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + std = float("5.96046e-08") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 384] + dtype = "float32" + min_val = float("-0.0793836") + max_val = float("0.0805083") + mean = float("0.000188609") + std = float("0.0199533") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0491545") + max_val = float("0.0631497") + mean = float("0.000771939") + std = float("0.0196944") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0755785") + max_val = float("0.0787668") + mean = float("9.04328e-06") + std = float("0.0199793") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [21128, 128] + dtype = "float32" + min_val = float("-0.0980633") + max_val = float("0.0982526") + mean = float("-7.01566e-06") + std = float("0.0200018") + data = None diff --git a/paddle_samples/PaddleNLP/albert-chinese-tiny/graph_net.json b/paddle_samples/PaddleNLP/albert-chinese-tiny/graph_net.json new file mode 100644 index 000000000..d83669f6d --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-tiny/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-chinese-tiny", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-chinese-tiny/input_meta.py b/paddle_samples/PaddleNLP/albert-chinese-tiny/input_meta.py new file mode 100644 index 000000000..3708564f7 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-tiny/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3614, 6816, 886, 4500, 4636, 2428, 7607, 3444, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-chinese-tiny/model.py b/paddle_samples/PaddleNLP/albert-chinese-tiny/model.py new file mode 100644 index 000000000..558af36b2 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-tiny/model.py @@ -0,0 +1,662 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x11xi64) <- (1x11xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x11xi64) <- (1x1x11xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x11xf32) <- (1x1x1x11xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [11] + + # pd_op.slice: (1x11xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 21128x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x11x128xf32, 1x11xf32, 1x11xf32) <- (1x11x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x128xf32, 128x312xf32) + matmul_0 = paddle._C_ops.matmul(layer_norm_0, parameter_20, False, False) + del layer_norm_0, parameter_20 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 11, 12, 26] + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x26xf32, 1x12x11x26xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.196116"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_2, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.matmul: (1x12x11x26xf32) <- (1x12x11x11xf32, 1x12x11x26xf32) + matmul_5 = paddle._C_ops.matmul(softmax_0, transpose_2, False, False) + del softmax_0, transpose_2 + + # pd_op.transpose: (1x11x12x26xf32) <- (1x12x11x26xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x11x312xf32) <- (1x11x12x26xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_8 = paddle._C_ops.add(add_2, add_7) + del add_2, add_7 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x11x1248xf32) <- (1x11x312xf32, 312x1248xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x11x1248xf32) <- (1x11x1248xf32, 1248xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.gelu: (1x11x1248xf32) <- (1x11x1248xf32) + gelu_0 = paddle._C_ops.gelu(add_9, False) + del add_9 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x1248xf32, 1248x312xf32) + matmul_8 = paddle._C_ops.matmul(gelu_0, parameter_4, False, False) + del gelu_0 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_11 = paddle._C_ops.add(add_10, layer_norm_3) + del add_10, layer_norm_3 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_4) + del add_12 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x26xf32, 1x12x11x26xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_3 = paddle._C_ops.scale(matmul_12, full_2, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_15 = paddle._C_ops.add(scale_3, scale_1) + del scale_3 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.matmul: (1x12x11x26xf32) <- (1x12x11x11xf32, 1x12x11x26xf32) + matmul_13 = paddle._C_ops.matmul(softmax_1, transpose_6, False, False) + del softmax_1, transpose_6 + + # pd_op.transpose: (1x11x12x26xf32) <- (1x12x11x26xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x11x312xf32) <- (1x11x12x26xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_17 = paddle._C_ops.add(layer_norm_6, add_16) + del add_16, layer_norm_6 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17 + + # pd_op.matmul: (1x11x1248xf32) <- (1x11x312xf32, 312x1248xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x11x1248xf32) <- (1x11x1248xf32, 1248xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.gelu: (1x11x1248xf32) <- (1x11x1248xf32) + gelu_1 = paddle._C_ops.gelu(add_18, False) + del add_18 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x1248xf32, 1248x312xf32) + matmul_16 = paddle._C_ops.matmul(gelu_1, parameter_4, False, False) + del gelu_1 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_20 = paddle._C_ops.add(add_19, layer_norm_9) + del add_19, layer_norm_9 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_4) + del add_21 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_4) + del add_22 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x26xf32, 1x12x11x26xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_4 = paddle._C_ops.scale(matmul_20, full_2, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_24 = paddle._C_ops.add(scale_4, scale_1) + del scale_4 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.matmul: (1x12x11x26xf32) <- (1x12x11x11xf32, 1x12x11x26xf32) + matmul_21 = paddle._C_ops.matmul(softmax_2, transpose_10, False, False) + del softmax_2, transpose_10 + + # pd_op.transpose: (1x11x12x26xf32) <- (1x12x11x26xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x11x312xf32) <- (1x11x12x26xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_26 = paddle._C_ops.add(layer_norm_12, add_25) + del add_25, layer_norm_12 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26 + + # pd_op.matmul: (1x11x1248xf32) <- (1x11x312xf32, 312x1248xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x11x1248xf32) <- (1x11x1248xf32, 1248xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.gelu: (1x11x1248xf32) <- (1x11x1248xf32) + gelu_2 = paddle._C_ops.gelu(add_27, False) + del add_27 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x1248xf32, 1248x312xf32) + matmul_24 = paddle._C_ops.matmul(gelu_2, parameter_4, False, False) + del gelu_2 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_29 = paddle._C_ops.add(add_28, layer_norm_15) + del add_28, layer_norm_15 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25, parameter_15 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26, parameter_13 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27, parameter_11 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_4) + del add_30 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_4) + del add_31 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_4) + del add_32, full_int_array_4 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x26xf32, 1x12x11x26xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_28, full_2, float("0"), True) + del full_2, matmul_28 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_33 = paddle._C_ops.add(scale_5, scale_1) + del scale_1, scale_5 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (1x12x11x26xf32) <- (1x12x11x11xf32, 1x12x11x26xf32) + matmul_29 = paddle._C_ops.matmul(softmax_3, transpose_14, False, False) + del softmax_3, transpose_14 + + # pd_op.transpose: (1x11x12x26xf32) <- (1x12x11x26xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x11x312xf32) <- (1x11x12x26xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del full_int_array_5, transpose_15 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del parameter_10, reshape_15 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30, parameter_9 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_35 = paddle._C_ops.add(layer_norm_18, add_34) + del add_34, layer_norm_18 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35, parameter_7, parameter_8 + + # pd_op.matmul: (1x11x1248xf32) <- (1x11x312xf32, 312x1248xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x11x1248xf32) <- (1x11x1248xf32, 1248xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31, parameter_5 + + # pd_op.gelu: (1x11x1248xf32) <- (1x11x1248xf32) + gelu_3 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x1248xf32, 1248x312xf32) + matmul_32 = paddle._C_ops.matmul(gelu_3, parameter_4, False, False) + del gelu_3, parameter_4 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32, parameter_3 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_38 = paddle._C_ops.add(add_37, layer_norm_21) + del add_37, layer_norm_21 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38, parameter_17, parameter_18 + + # pd_op.slice: (1x312xf32) <- (1x11x312xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_24, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x312xf32) <- (1x312xf32, 312x312xf32) + matmul_33 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x312xf32) <- (1x312xf32, 312xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_1) + del matmul_33, parameter_1 + + # pd_op.tanh: (1x312xf32) <- (1x312xf32) + tanh_0 = paddle._C_ops.tanh(add_39) + del add_39, layer_norm_24 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-chinese-tiny/weight_meta.py b/paddle_samples/PaddleNLP/albert-chinese-tiny/weight_meta.py new file mode 100644 index 000000000..3bac8dae2 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-tiny/weight_meta.py @@ -0,0 +1,235 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0943927") + max_val = float("0.0805598") + mean = float("-5.04225e-05") + std = float("0.0199894") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1248, 312] + dtype = "float32" + min_val = float("-0.0882163") + max_val = float("0.0910547") + mean = float("-7.22546e-06") + std = float("0.0199967") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [1248] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [312, 1248] + dtype = "float32" + min_val = float("-0.089193") + max_val = float("0.10013") + mean = float("-5.28496e-05") + std = float("0.0199953") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [312] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0864897") + max_val = float("0.0923653") + mean = float("6.8981e-05") + std = float("0.0200065") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0912581") + max_val = float("0.0870574") + mean = float("-4.12729e-05") + std = float("0.0200247") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0832851") + max_val = float("0.0934653") + mean = float("-2.10013e-05") + std = float("0.0200296") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0901047") + max_val = float("0.0851487") + mean = float("-2.36235e-06") + std = float("0.020018") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [312] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 312] + dtype = "float32" + min_val = float("-0.0820946") + max_val = float("0.0974006") + mean = float("-5.19528e-05") + std = float("0.0200838") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0599234") + max_val = float("0.066722") + mean = float("-0.00042005") + std = float("0.0224804") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0834669") + max_val = float("0.0840402") + mean = float("0.000142117") + std = float("0.0199409") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [21128, 128] + dtype = "float32" + min_val = float("-0.0938932") + max_val = float("0.105185") + mean = float("1.19757e-05") + std = float("0.0199975") + data = None diff --git a/paddle_samples/PaddleNLP/t5-small/graph_net.json b/paddle_samples/PaddleNLP/t5-small/graph_net.json new file mode 100644 index 000000000..6b649b3dd --- /dev/null +++ b/paddle_samples/PaddleNLP/t5-small/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "t5-small", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/t5-small/input_meta.py b/paddle_samples/PaddleNLP/t5-small/input_meta.py new file mode 100644 index 000000000..846bab065 --- /dev/null +++ b/paddle_samples/PaddleNLP/t5-small/input_meta.py @@ -0,0 +1,40 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 20] + dtype = "int64" + data = [ + 8774, + 6, + 82, + 564, + 19, + 5762, + 5, + 27, + 183, + 1036, + 81, + 508, + 1612, + 2250, + 11, + 70, + 4648, + 7, + 5, + 1, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 20] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 1] + dtype = "int64" + data = [0] diff --git a/paddle_samples/PaddleNLP/t5-small/model.py b/paddle_samples/PaddleNLP/t5-small/model.py new file mode 100644 index 000000000..29476168f --- /dev/null +++ b/paddle_samples/PaddleNLP/t5-small/model.py @@ -0,0 +1,3317 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + data_0, + data_1, + data_2, + ): + # pd_op.embedding: (1x20x512xf32) <- (1x20xi64, 32128x512xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_130, -1, False) + del data_0 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x20xi64) <- (1x20xi64, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.cast: (1x1x1x20xf32) <- (1x1x1x20xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_0, paddle.float32) + del unsqueeze_0 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x20xf32) <- (1x1x1x20xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x20xf32) <- (1x1x1x20xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del scale_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_0 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_0 = paddle._C_ops.pow(dropout_0, float("2")) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_0 = paddle._C_ops.mean(pow_0, full_int_array_1, True) + del pow_0 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_2 = paddle._C_ops.scale(mean_0, full_3, float("1e-06"), True) + del mean_0 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_0 = paddle._C_ops.rsqrt(scale_2) + del scale_2 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_0 = paddle._C_ops.multiply(dropout_0, rsqrt_0) + del rsqrt_0 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_1 = paddle._C_ops.multiply(parameter_124, multiply_0) + del multiply_0, parameter_124 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_1 = paddle._C_ops.matmul(multiply_1, parameter_129, False, False) + del parameter_129 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [1, -1, 8, 64] + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_1, full_int_array_2) + del matmul_1 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_2 = paddle._C_ops.matmul(multiply_1, parameter_128, False, False) + del parameter_128 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_2, full_int_array_2) + del matmul_2 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_3 = paddle._C_ops.matmul(multiply_1, parameter_127, False, False) + del multiply_1, parameter_127 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_3, full_int_array_2) + del matmul_3 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf64) <- () + full_4 = paddle._C_ops.full( + [1], float("0"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_5 = paddle._C_ops.full( + [1], float("20"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_6 = paddle._C_ops.full( + [1], float("1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (20xi64) <- (1xf64, 1xf64, 1xf64) + arange_0 = paddle.arange(full_4, full_5, full_6, dtype="int64") + del full_5 + + # pd_op.unsqueeze: (20x1xi64) <- (20xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(arange_0, full_int_array_1) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [0] + + # pd_op.unsqueeze: (1x20xi64) <- (20xi64, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(arange_0, full_int_array_3) + del arange_0 + + # pd_op.subtract: (20x20xi64) <- (1x20xi64, 20x1xi64) + subtract_0 = paddle._C_ops.subtract(unsqueeze_2, unsqueeze_1) + del unsqueeze_1, unsqueeze_2 + + # pd_op.full: (xi64) <- () + full_7 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.greater_than: (20x20xb) <- (20x20xi64, xi64) + greater_than_0 = paddle._C_ops.greater_than(subtract_0, full_7) + del full_7 + + # pd_op.cast: (20x20xi64) <- (20x20xb) + cast_1 = paddle._C_ops.cast(greater_than_0, paddle.int64) + del greater_than_0 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("16"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (20x20xi64) <- (20x20xi64, 1xf32) + scale_3 = paddle._C_ops.scale(cast_1, full_8, float("0"), True) + del cast_1 + + # pd_op.scale: (20x20xi64) <- (20x20xi64, 1xf32) + scale_4 = paddle._C_ops.scale(scale_3, full_3, float("0"), True) + del scale_3 + + # pd_op.abs: (20x20xi64) <- (20x20xi64) + abs_0 = paddle._C_ops.abs(subtract_0) + del subtract_0 + + # pd_op.full: (xi64) <- () + full_9 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.less_than: (20x20xb) <- (20x20xi64, xi64) + less_than_0 = paddle._C_ops.less_than(abs_0, full_9) + del full_9 + + # pd_op.cast: (20x20xf32) <- (20x20xi64) + cast_2 = paddle._C_ops.cast(abs_0, paddle.float32) + + # pd_op.full: (1xf32) <- () + full_10 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (20x20xf32) <- (20x20xf32, 1xf32) + scale_5 = paddle._C_ops.scale(cast_2, full_10, float("0"), True) + del cast_2, full_10 + + # pd_op.log: (20x20xf32) <- (20x20xf32) + log_0 = paddle._C_ops.log(scale_5) + del scale_5 + + # pd_op.full: (1xf32) <- () + full_11 = paddle._C_ops.full( + [1], float("0.360674"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (20x20xf32) <- (20x20xf32, 1xf32) + scale_6 = paddle._C_ops.scale(log_0, full_11, float("0"), True) + del full_11, log_0 + + # pd_op.full: (1xf32) <- () + full_12 = paddle._C_ops.full( + [1], float("8"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (20x20xf32) <- (20x20xf32, 1xf32) + scale_7 = paddle._C_ops.scale(scale_6, full_12, float("0"), True) + del full_12, scale_6 + + # pd_op.cast: (20x20xi64) <- (20x20xf32) + cast_3 = paddle._C_ops.cast(scale_7, paddle.int64) + del scale_7 + + # pd_op.scale: (20x20xi64) <- (20x20xi64, 1xf32) + scale_8 = paddle._C_ops.scale(cast_3, full_3, float("8"), True) + del cast_3 + + # pd_op.full: (1xf32) <- () + full_13 = paddle._C_ops.full( + [1], float("15"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (20x20xi64) <- (20x20xi64, 1xf32) + full_like_0 = paddle._C_ops.full_like( + scale_8, full_13, paddle.int64, paddle.framework._current_expected_place() + ) + del full_13 + + # pd_op.minimum: (20x20xi64) <- (20x20xi64, 20x20xi64) + minimum_0 = paddle._C_ops.minimum(scale_8, full_like_0) + del full_like_0, scale_8 + + # pd_op.where: (20x20xi64) <- (20x20xb, 20x20xi64, 20x20xi64) + where_0 = paddle._C_ops.where(less_than_0, abs_0, minimum_0) + del abs_0, less_than_0, minimum_0 + + # pd_op.add: (20x20xi64) <- (20x20xi64, 20x20xi64) + add_0 = paddle._C_ops.add(scale_4, where_0) + del scale_4, where_0 + + # pd_op.embedding: (20x20x8xf32) <- (20x20xi64, 32x8xf32) + embedding_1 = paddle._C_ops.embedding(add_0, parameter_125, -1, False) + del add_0, parameter_125 + + # pd_op.transpose: (8x20x20xf32) <- (20x20x8xf32) + transpose_3 = paddle._C_ops.transpose(embedding_1, [2, 0, 1]) + del embedding_1 + + # pd_op.unsqueeze: (1x8x20x20xf32) <- (8x20x20xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_3, full_int_array_3) + del transpose_3 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x1x1x20xf32) + add_1 = paddle._C_ops.add(unsqueeze_3, scale_1) + del unsqueeze_3 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_2 = paddle._C_ops.add(matmul_4, add_1) + del matmul_4 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_0 = paddle._C_ops.softmax(add_2, -1) + del add_2 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_5 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_4 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [1, -1, 512] + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_4, full_int_array_4) + del transpose_4 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_126, False, False) + del parameter_126, reshape_3 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_6, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_6 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_3 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_1 = paddle._C_ops.pow(add_3, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_1 = paddle._C_ops.mean(pow_1, full_int_array_1, True) + del pow_1 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_9 = paddle._C_ops.scale(mean_1, full_3, float("1e-06"), True) + del mean_1 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_1 = paddle._C_ops.rsqrt(scale_9) + del scale_9 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_2 = paddle._C_ops.multiply(add_3, rsqrt_1) + del rsqrt_1 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_3 = paddle._C_ops.multiply(parameter_121, multiply_2) + del multiply_2, parameter_121 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_7 = paddle._C_ops.matmul(multiply_3, parameter_123, False, False) + del multiply_3, parameter_123 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_0 = paddle._C_ops.relu(matmul_7) + del matmul_7 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_0 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_8 = paddle._C_ops.matmul(dropout_6, parameter_122, False, False) + del dropout_6, parameter_122 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_8, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_8 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_4 = paddle._C_ops.add(dropout_8, add_3) + del add_3, dropout_8 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_2 = paddle._C_ops.pow(add_4, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_2 = paddle._C_ops.mean(pow_2, full_int_array_1, True) + del pow_2 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_10 = paddle._C_ops.scale(mean_2, full_3, float("1e-06"), True) + del mean_2 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_2 = paddle._C_ops.rsqrt(scale_10) + del scale_10 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_4 = paddle._C_ops.multiply(add_4, rsqrt_2) + del rsqrt_2 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_5 = paddle._C_ops.multiply(parameter_116, multiply_4) + del multiply_4, parameter_116 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_9 = paddle._C_ops.matmul(multiply_5, parameter_120, False, False) + del parameter_120 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(matmul_9, full_int_array_2) + del matmul_9 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_10 = paddle._C_ops.matmul(multiply_5, parameter_119, False, False) + del parameter_119 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(matmul_10, full_int_array_2) + del matmul_10 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_11 = paddle._C_ops.matmul(multiply_5, parameter_118, False, False) + del multiply_5, parameter_118 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(matmul_11, full_int_array_2) + del matmul_11 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_7 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_12 = paddle._C_ops.matmul(transpose_5, transpose_6, False, True) + del transpose_5, transpose_6 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_5 = paddle._C_ops.add(matmul_12, add_1) + del matmul_12 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_1 = paddle._C_ops.softmax(add_5, -1) + del add_5 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_13 = paddle._C_ops.matmul(dropout_10, transpose_7, False, False) + del dropout_10, transpose_7 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_8 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_8, full_int_array_4) + del transpose_8 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_117, False, False) + del parameter_117, reshape_7 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_14, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_14 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_6 = paddle._C_ops.add(add_4, dropout_12) + del add_4, dropout_12 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_3 = paddle._C_ops.pow(add_6, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_3 = paddle._C_ops.mean(pow_3, full_int_array_1, True) + del pow_3 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_11 = paddle._C_ops.scale(mean_3, full_3, float("1e-06"), True) + del mean_3 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_3 = paddle._C_ops.rsqrt(scale_11) + del scale_11 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_6 = paddle._C_ops.multiply(add_6, rsqrt_3) + del rsqrt_3 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_7 = paddle._C_ops.multiply(parameter_113, multiply_6) + del multiply_6, parameter_113 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_15 = paddle._C_ops.matmul(multiply_7, parameter_115, False, False) + del multiply_7, parameter_115 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_1 = paddle._C_ops.relu(matmul_15) + del matmul_15 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_1, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_1 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_16 = paddle._C_ops.matmul(dropout_14, parameter_114, False, False) + del dropout_14, parameter_114 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_16, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_16 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_7 = paddle._C_ops.add(dropout_16, add_6) + del add_6, dropout_16 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_4 = paddle._C_ops.pow(add_7, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_4 = paddle._C_ops.mean(pow_4, full_int_array_1, True) + del pow_4 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_12 = paddle._C_ops.scale(mean_4, full_3, float("1e-06"), True) + del mean_4 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_4 = paddle._C_ops.rsqrt(scale_12) + del scale_12 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_8 = paddle._C_ops.multiply(add_7, rsqrt_4) + del rsqrt_4 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_9 = paddle._C_ops.multiply(parameter_108, multiply_8) + del multiply_8, parameter_108 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_17 = paddle._C_ops.matmul(multiply_9, parameter_112, False, False) + del parameter_112 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_17, full_int_array_2) + del matmul_17 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_18 = paddle._C_ops.matmul(multiply_9, parameter_111, False, False) + del parameter_111 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_18, full_int_array_2) + del matmul_18 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_19 = paddle._C_ops.matmul(multiply_9, parameter_110, False, False) + del multiply_9, parameter_110 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_19, full_int_array_2) + del matmul_19 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_11 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_20 = paddle._C_ops.matmul(transpose_9, transpose_10, False, True) + del transpose_10, transpose_9 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_8 = paddle._C_ops.add(matmul_20, add_1) + del matmul_20 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_2 = paddle._C_ops.softmax(add_8, -1) + del add_8 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_21 = paddle._C_ops.matmul(dropout_18, transpose_11, False, False) + del dropout_18, transpose_11 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_12 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_12, full_int_array_4) + del transpose_12 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_109, False, False) + del parameter_109, reshape_11 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_22, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_22 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_9 = paddle._C_ops.add(add_7, dropout_20) + del add_7, dropout_20 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_5 = paddle._C_ops.pow(add_9, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_5 = paddle._C_ops.mean(pow_5, full_int_array_1, True) + del pow_5 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_13 = paddle._C_ops.scale(mean_5, full_3, float("1e-06"), True) + del mean_5 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_5 = paddle._C_ops.rsqrt(scale_13) + del scale_13 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_10 = paddle._C_ops.multiply(add_9, rsqrt_5) + del rsqrt_5 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_11 = paddle._C_ops.multiply(parameter_105, multiply_10) + del multiply_10, parameter_105 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_23 = paddle._C_ops.matmul(multiply_11, parameter_107, False, False) + del multiply_11, parameter_107 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_2 = paddle._C_ops.relu(matmul_23) + del matmul_23 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_2, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_2 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_24 = paddle._C_ops.matmul(dropout_22, parameter_106, False, False) + del dropout_22, parameter_106 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_24, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_24 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_10 = paddle._C_ops.add(dropout_24, add_9) + del add_9, dropout_24 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_6 = paddle._C_ops.pow(add_10, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_6 = paddle._C_ops.mean(pow_6, full_int_array_1, True) + del pow_6 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_14 = paddle._C_ops.scale(mean_6, full_3, float("1e-06"), True) + del mean_6 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_6 = paddle._C_ops.rsqrt(scale_14) + del scale_14 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_12 = paddle._C_ops.multiply(add_10, rsqrt_6) + del rsqrt_6 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_13 = paddle._C_ops.multiply(parameter_100, multiply_12) + del multiply_12, parameter_100 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_25 = paddle._C_ops.matmul(multiply_13, parameter_104, False, False) + del parameter_104 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(matmul_25, full_int_array_2) + del matmul_25 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_26 = paddle._C_ops.matmul(multiply_13, parameter_103, False, False) + del parameter_103 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(matmul_26, full_int_array_2) + del matmul_26 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_27 = paddle._C_ops.matmul(multiply_13, parameter_102, False, False) + del multiply_13, parameter_102 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_27, full_int_array_2) + del matmul_27 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_15 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_28 = paddle._C_ops.matmul(transpose_13, transpose_14, False, True) + del transpose_13, transpose_14 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_11 = paddle._C_ops.add(matmul_28, add_1) + del matmul_28 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_3 = paddle._C_ops.softmax(add_11, -1) + del add_11 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_29 = paddle._C_ops.matmul(dropout_26, transpose_15, False, False) + del dropout_26, transpose_15 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_16 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_16, full_int_array_4) + del transpose_16 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_101, False, False) + del parameter_101, reshape_15 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_30, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_30 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_12 = paddle._C_ops.add(add_10, dropout_28) + del add_10, dropout_28 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_7 = paddle._C_ops.pow(add_12, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_7 = paddle._C_ops.mean(pow_7, full_int_array_1, True) + del pow_7 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_15 = paddle._C_ops.scale(mean_7, full_3, float("1e-06"), True) + del mean_7 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_7 = paddle._C_ops.rsqrt(scale_15) + del scale_15 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_14 = paddle._C_ops.multiply(add_12, rsqrt_7) + del rsqrt_7 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_15 = paddle._C_ops.multiply(parameter_97, multiply_14) + del multiply_14, parameter_97 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_31 = paddle._C_ops.matmul(multiply_15, parameter_99, False, False) + del multiply_15, parameter_99 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_3 = paddle._C_ops.relu(matmul_31) + del matmul_31 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_3, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_3 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_32 = paddle._C_ops.matmul(dropout_30, parameter_98, False, False) + del dropout_30, parameter_98 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_32, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_32 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_13 = paddle._C_ops.add(dropout_32, add_12) + del add_12, dropout_32 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_8 = paddle._C_ops.pow(add_13, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_8 = paddle._C_ops.mean(pow_8, full_int_array_1, True) + del pow_8 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_16 = paddle._C_ops.scale(mean_8, full_3, float("1e-06"), True) + del mean_8 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_8 = paddle._C_ops.rsqrt(scale_16) + del scale_16 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_16 = paddle._C_ops.multiply(add_13, rsqrt_8) + del rsqrt_8 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_17 = paddle._C_ops.multiply(parameter_92, multiply_16) + del multiply_16, parameter_92 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_33 = paddle._C_ops.matmul(multiply_17, parameter_96, False, False) + del parameter_96 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_33, full_int_array_2) + del matmul_33 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_34 = paddle._C_ops.matmul(multiply_17, parameter_95, False, False) + del parameter_95 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_34, full_int_array_2) + del matmul_34 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_35 = paddle._C_ops.matmul(multiply_17, parameter_94, False, False) + del multiply_17, parameter_94 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(matmul_35, full_int_array_2) + del matmul_35 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_19 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_36 = paddle._C_ops.matmul(transpose_17, transpose_18, False, True) + del transpose_17, transpose_18 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_14 = paddle._C_ops.add(matmul_36, add_1) + del matmul_36 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_4 = paddle._C_ops.softmax(add_14, -1) + del add_14 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_37 = paddle._C_ops.matmul(dropout_34, transpose_19, False, False) + del dropout_34, transpose_19 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_20 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_20, full_int_array_4) + del transpose_20 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_93, False, False) + del parameter_93, reshape_19 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_38, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_38 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_15 = paddle._C_ops.add(add_13, dropout_36) + del add_13, dropout_36 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_9 = paddle._C_ops.pow(add_15, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_9 = paddle._C_ops.mean(pow_9, full_int_array_1, True) + del pow_9 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_17 = paddle._C_ops.scale(mean_9, full_3, float("1e-06"), True) + del mean_9 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_9 = paddle._C_ops.rsqrt(scale_17) + del scale_17 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_18 = paddle._C_ops.multiply(add_15, rsqrt_9) + del rsqrt_9 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_19 = paddle._C_ops.multiply(parameter_89, multiply_18) + del multiply_18, parameter_89 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_39 = paddle._C_ops.matmul(multiply_19, parameter_91, False, False) + del multiply_19, parameter_91 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_4 = paddle._C_ops.relu(matmul_39) + del matmul_39 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_4, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_4 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_40 = paddle._C_ops.matmul(dropout_38, parameter_90, False, False) + del dropout_38, parameter_90 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_40, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_40 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_16 = paddle._C_ops.add(dropout_40, add_15) + del add_15, dropout_40 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_10 = paddle._C_ops.pow(add_16, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_10 = paddle._C_ops.mean(pow_10, full_int_array_1, True) + del pow_10 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_18 = paddle._C_ops.scale(mean_10, full_3, float("1e-06"), True) + del mean_10 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_10 = paddle._C_ops.rsqrt(scale_18) + del scale_18 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_20 = paddle._C_ops.multiply(add_16, rsqrt_10) + del rsqrt_10 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_21 = paddle._C_ops.multiply(parameter_84, multiply_20) + del multiply_20, parameter_84 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_41 = paddle._C_ops.matmul(multiply_21, parameter_88, False, False) + del parameter_88 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(matmul_41, full_int_array_2) + del matmul_41 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_42 = paddle._C_ops.matmul(multiply_21, parameter_87, False, False) + del parameter_87 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_42, full_int_array_2) + del matmul_42 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_43 = paddle._C_ops.matmul(multiply_21, parameter_86, False, False) + del multiply_21, parameter_86 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_43, full_int_array_2) + del matmul_43 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_23 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_44 = paddle._C_ops.matmul(transpose_21, transpose_22, False, True) + del transpose_21, transpose_22 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_17 = paddle._C_ops.add(matmul_44, add_1) + del add_1, matmul_44 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_5 = paddle._C_ops.softmax(add_17, -1) + del add_17 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_45 = paddle._C_ops.matmul(dropout_42, transpose_23, False, False) + del dropout_42, transpose_23 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_24 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_24, full_int_array_4) + del transpose_24 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_85, False, False) + del parameter_85, reshape_23 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_46, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_46 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_18 = paddle._C_ops.add(add_16, dropout_44) + del add_16, dropout_44 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_11 = paddle._C_ops.pow(add_18, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_11 = paddle._C_ops.mean(pow_11, full_int_array_1, True) + del pow_11 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_19 = paddle._C_ops.scale(mean_11, full_3, float("1e-06"), True) + del mean_11 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_11 = paddle._C_ops.rsqrt(scale_19) + del scale_19 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_22 = paddle._C_ops.multiply(add_18, rsqrt_11) + del rsqrt_11 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_23 = paddle._C_ops.multiply(parameter_81, multiply_22) + del multiply_22, parameter_81 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_47 = paddle._C_ops.matmul(multiply_23, parameter_83, False, False) + del multiply_23, parameter_83 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_5 = paddle._C_ops.relu(matmul_47) + del matmul_47 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_5, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_5 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_48 = paddle._C_ops.matmul(dropout_46, parameter_82, False, False) + del dropout_46, parameter_82 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_48, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_48 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_19 = paddle._C_ops.add(dropout_48, add_18) + del add_18, dropout_48 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_12 = paddle._C_ops.pow(add_19, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_12 = paddle._C_ops.mean(pow_12, full_int_array_1, True) + del pow_12 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_20 = paddle._C_ops.scale(mean_12, full_3, float("1e-06"), True) + del mean_12 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_12 = paddle._C_ops.rsqrt(scale_20) + del scale_20 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_24 = paddle._C_ops.multiply(add_19, rsqrt_12) + del add_19, rsqrt_12 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_25 = paddle._C_ops.multiply(parameter_80, multiply_24) + del multiply_24, parameter_80 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + multiply_25, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del multiply_25 + + # pd_op.embedding: (1x1x512xf32) <- (1x1xi64, 32128x512xf32) + embedding_2 = paddle._C_ops.embedding(data_2, parameter_130, -1, False) + del data_2 + + # pd_op.full: (1x1xf32) <- () + full_14 = paddle._C_ops.full( + [1, 1], + float("1"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.arange: (1xi64) <- (1xf64, 1xf64, 1xf64) + arange_1 = paddle.arange(full_4, full_6, full_6, dtype="int64") + del full_4, full_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [0, 1] + + # pd_op.unsqueeze: (1x1x1xi64) <- (1xi64, 2xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(arange_1, full_int_array_5) + del full_int_array_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_6 = [1, 1, 1] + + # pd_op.tile: (1x1x1xi64) <- (1x1x1xi64, 3xi64) + tile_0 = paddle._C_ops.tile(unsqueeze_4, full_int_array_6) + del full_int_array_6, unsqueeze_4 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_7 = [0, 2] + + # pd_op.unsqueeze: (1x1x1xi64) <- (1xi64, 2xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(arange_1, full_int_array_7) + del full_int_array_7 + + # pd_op.less_equal: (1x1x1xb) <- (1x1x1xi64, 1x1x1xi64) + less_equal_0 = paddle._C_ops.less_equal(tile_0, unsqueeze_5) + del tile_0, unsqueeze_5 + + # pd_op.cast: (1x1x1xf32) <- (1x1x1xb) + cast_4 = paddle._C_ops.cast(less_equal_0, paddle.float32) + del less_equal_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [1] + + # pd_op.unsqueeze: (1x1x1x1xf32) <- (1x1x1xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(cast_4, full_int_array_8) + del cast_4, full_int_array_8 + + # pd_op.unsqueeze: (1x1x1x1xf32) <- (1x1xf32, 2xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(full_14, full_int_array_0) + del full_14, full_int_array_0 + + # pd_op.multiply: (1x1x1x1xf32) <- (1x1x1x1xf32, 1x1x1x1xf32) + multiply_26 = paddle._C_ops.multiply(unsqueeze_6, unsqueeze_7) + del unsqueeze_6, unsqueeze_7 + + # pd_op.scale: (1x1x1x1xf32) <- (1x1x1x1xf32, 1xf32) + scale_21 = paddle._C_ops.scale(multiply_26, full_0, float("1"), True) + del multiply_26 + + # pd_op.scale: (1x1x1x1xf32) <- (1x1x1x1xf32, 1xf32) + scale_22 = paddle._C_ops.scale(scale_21, full_1, float("0"), True) + del full_1, scale_21 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_2, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_2 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_13 = paddle._C_ops.pow(dropout_52, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_13 = paddle._C_ops.mean(pow_13, full_int_array_1, True) + del pow_13 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_23 = paddle._C_ops.scale(mean_13, full_3, float("1e-06"), True) + del mean_13 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_13 = paddle._C_ops.rsqrt(scale_23) + del scale_23 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_27 = paddle._C_ops.multiply(dropout_52, rsqrt_13) + del rsqrt_13 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_28 = paddle._C_ops.multiply(parameter_74, multiply_27) + del multiply_27, parameter_74 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_49 = paddle._C_ops.matmul(multiply_28, parameter_79, False, False) + del parameter_79 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_49, full_int_array_2) + del matmul_49 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_50 = paddle._C_ops.matmul(multiply_28, parameter_78, False, False) + del parameter_78 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(matmul_50, full_int_array_2) + del matmul_50 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_51 = paddle._C_ops.matmul(multiply_28, parameter_77, False, False) + del multiply_28, parameter_77 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(matmul_51, full_int_array_2) + del matmul_51 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_27 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_52 = paddle._C_ops.matmul(transpose_25, transpose_26, False, True) + del transpose_25 + + # pd_op.unsqueeze: (1x1xi64) <- (1xi64, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(arange_1, full_int_array_1) + + # pd_op.unsqueeze: (1x1xi64) <- (1xi64, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(arange_1, full_int_array_3) + del arange_1 + + # pd_op.subtract: (1x1xi64) <- (1x1xi64, 1x1xi64) + subtract_1 = paddle._C_ops.subtract(unsqueeze_9, unsqueeze_8) + del unsqueeze_8, unsqueeze_9 + + # pd_op.full: (1xf32) <- () + full_15 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x1xi64) <- (1x1xi64, 1xf32) + full_like_1 = paddle._C_ops.full_like( + subtract_1, + full_15, + paddle.int64, + paddle.framework._current_expected_place(), + ) + del full_15 + + # pd_op.minimum: (1x1xi64) <- (1x1xi64, 1x1xi64) + minimum_1 = paddle._C_ops.minimum(subtract_1, full_like_1) + del full_like_1, subtract_1 + + # pd_op.scale: (1x1xi64) <- (1x1xi64, 1xf32) + scale_24 = paddle._C_ops.scale(minimum_1, full_0, float("0"), True) + del full_0, minimum_1 + + # pd_op.full: (xi64) <- () + full_16 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.less_than: (1x1xb) <- (1x1xi64, xi64) + less_than_1 = paddle._C_ops.less_than(scale_24, full_16) + del full_16 + + # pd_op.cast: (1x1xf32) <- (1x1xi64) + cast_5 = paddle._C_ops.cast(scale_24, paddle.float32) + + # pd_op.full: (1xf32) <- () + full_17 = paddle._C_ops.full( + [1], float("0.0625"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1xf32) <- (1x1xf32, 1xf32) + scale_25 = paddle._C_ops.scale(cast_5, full_17, float("0"), True) + del cast_5, full_17 + + # pd_op.log: (1x1xf32) <- (1x1xf32) + log_1 = paddle._C_ops.log(scale_25) + del scale_25 + + # pd_op.full: (1xf32) <- () + full_18 = paddle._C_ops.full( + [1], float("0.480898"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1xf32) <- (1x1xf32, 1xf32) + scale_26 = paddle._C_ops.scale(log_1, full_18, float("0"), True) + del full_18, log_1 + + # pd_op.scale: (1x1xf32) <- (1x1xf32, 1xf32) + scale_27 = paddle._C_ops.scale(scale_26, full_8, float("0"), True) + del full_8, scale_26 + + # pd_op.cast: (1x1xi64) <- (1x1xf32) + cast_6 = paddle._C_ops.cast(scale_27, paddle.int64) + del scale_27 + + # pd_op.scale: (1x1xi64) <- (1x1xi64, 1xf32) + scale_28 = paddle._C_ops.scale(cast_6, full_3, float("16"), True) + del cast_6 + + # pd_op.full: (1xf32) <- () + full_19 = paddle._C_ops.full( + [1], float("31"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x1xi64) <- (1x1xi64, 1xf32) + full_like_2 = paddle._C_ops.full_like( + scale_28, full_19, paddle.int64, paddle.framework._current_expected_place() + ) + del full_19 + + # pd_op.minimum: (1x1xi64) <- (1x1xi64, 1x1xi64) + minimum_2 = paddle._C_ops.minimum(scale_28, full_like_2) + del full_like_2, scale_28 + + # pd_op.where: (1x1xi64) <- (1x1xb, 1x1xi64, 1x1xi64) + where_1 = paddle._C_ops.where(less_than_1, scale_24, minimum_2) + del less_than_1, minimum_2, scale_24 + + # pd_op.scale: (1x1xi64) <- (1x1xi64, 1xf32) + scale_29 = paddle._C_ops.scale(where_1, full_3, float("0"), True) + del where_1 + + # pd_op.embedding: (1x1x8xf32) <- (1x1xi64, 32x8xf32) + embedding_3 = paddle._C_ops.embedding(scale_29, parameter_75, -1, False) + del parameter_75, scale_29 + + # pd_op.transpose: (8x1x1xf32) <- (1x1x8xf32) + transpose_28 = paddle._C_ops.transpose(embedding_3, [2, 0, 1]) + del embedding_3 + + # pd_op.unsqueeze: (1x8x1x1xf32) <- (8x1x1xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_3) + del full_int_array_3, transpose_28 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x1x1x1xf32) + add_20 = paddle._C_ops.add(unsqueeze_10, scale_22) + del scale_22, unsqueeze_10 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_21 = paddle._C_ops.add(matmul_52, add_20) + del matmul_52 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_6 = paddle._C_ops.softmax(add_21, -1) + del add_21 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_53 = paddle._C_ops.matmul(dropout_54, transpose_27, False, False) + del dropout_54 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_29 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_29, full_int_array_4) + del transpose_29 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_76, False, False) + del parameter_76, reshape_27 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_54, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_54 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_22 = paddle._C_ops.add(dropout_52, dropout_56) + del dropout_52, dropout_56 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_14 = paddle._C_ops.pow(add_22, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_14 = paddle._C_ops.mean(pow_14, full_int_array_1, True) + del pow_14 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_30 = paddle._C_ops.scale(mean_14, full_3, float("1e-06"), True) + del mean_14 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_14 = paddle._C_ops.rsqrt(scale_30) + del scale_30 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_29 = paddle._C_ops.multiply(add_22, rsqrt_14) + del rsqrt_14 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_30 = paddle._C_ops.multiply(parameter_69, multiply_29) + del multiply_29, parameter_69 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_55 = paddle._C_ops.matmul(multiply_30, parameter_73, False, False) + del multiply_30, parameter_73 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_55, full_int_array_2) + del matmul_55 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_56 = paddle._C_ops.matmul(dropout_50, parameter_72, False, False) + del parameter_72 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_56, full_int_array_2) + del matmul_56 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_31 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_57 = paddle._C_ops.matmul(dropout_50, parameter_71, False, False) + del parameter_71 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_57, full_int_array_2) + del matmul_57 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_58 = paddle._C_ops.matmul(transpose_30, transpose_31, False, True) + del transpose_30 + + # pd_op.full: (1x8x1x20xf32) <- () + full_20 = paddle._C_ops.full( + [1, 8, 1, 20], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x1x1x20xf32) + add_23 = paddle._C_ops.add(full_20, scale_1) + del full_20, scale_1 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_24 = paddle._C_ops.add(matmul_58, add_23) + del matmul_58 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_7 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_59 = paddle._C_ops.matmul(dropout_58, transpose_32, False, False) + del dropout_58 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_33 = paddle._C_ops.transpose(matmul_59, [0, 2, 1, 3]) + del matmul_59 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_33, full_int_array_4) + del transpose_33 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_60 = paddle._C_ops.matmul(reshape_31, parameter_70, False, False) + del parameter_70, reshape_31 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_60, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_60 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_25 = paddle._C_ops.add(add_22, dropout_60) + del add_22, dropout_60 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_15 = paddle._C_ops.pow(add_25, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_15 = paddle._C_ops.mean(pow_15, full_int_array_1, True) + del pow_15 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_31 = paddle._C_ops.scale(mean_15, full_3, float("1e-06"), True) + del mean_15 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_15 = paddle._C_ops.rsqrt(scale_31) + del scale_31 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_31 = paddle._C_ops.multiply(add_25, rsqrt_15) + del rsqrt_15 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_32 = paddle._C_ops.multiply(parameter_66, multiply_31) + del multiply_31, parameter_66 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_61 = paddle._C_ops.matmul(multiply_32, parameter_68, False, False) + del multiply_32, parameter_68 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_6 = paddle._C_ops.relu(matmul_61) + del matmul_61 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_6, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_6 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_62 = paddle._C_ops.matmul(dropout_62, parameter_67, False, False) + del dropout_62, parameter_67 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_62, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_62 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_26 = paddle._C_ops.add(dropout_64, add_25) + del add_25, dropout_64 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_16 = paddle._C_ops.pow(add_26, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_16 = paddle._C_ops.mean(pow_16, full_int_array_1, True) + del pow_16 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_32 = paddle._C_ops.scale(mean_16, full_3, float("1e-06"), True) + del mean_16 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_16 = paddle._C_ops.rsqrt(scale_32) + del scale_32 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_33 = paddle._C_ops.multiply(add_26, rsqrt_16) + del rsqrt_16 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_34 = paddle._C_ops.multiply(parameter_61, multiply_33) + del multiply_33, parameter_61 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_63 = paddle._C_ops.matmul(multiply_34, parameter_65, False, False) + del parameter_65 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(matmul_63, full_int_array_2) + del matmul_63 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_64 = paddle._C_ops.matmul(multiply_34, parameter_64, False, False) + del parameter_64 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(matmul_64, full_int_array_2) + del matmul_64 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_35 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_65 = paddle._C_ops.matmul(multiply_34, parameter_63, False, False) + del multiply_34, parameter_63 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(matmul_65, full_int_array_2) + del matmul_65 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_66 = paddle._C_ops.matmul(transpose_34, transpose_35, False, True) + del transpose_34 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_27 = paddle._C_ops.add(matmul_66, add_20) + del matmul_66 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_8 = paddle._C_ops.softmax(add_27, -1) + del add_27 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_67 = paddle._C_ops.matmul(dropout_66, transpose_36, False, False) + del dropout_66 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_37 = paddle._C_ops.transpose(matmul_67, [0, 2, 1, 3]) + del matmul_67 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_37, full_int_array_4) + del transpose_37 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_68 = paddle._C_ops.matmul(reshape_35, parameter_62, False, False) + del parameter_62, reshape_35 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_68, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_68 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_28 = paddle._C_ops.add(add_26, dropout_68) + del add_26, dropout_68 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_17 = paddle._C_ops.pow(add_28, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_17 = paddle._C_ops.mean(pow_17, full_int_array_1, True) + del pow_17 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_33 = paddle._C_ops.scale(mean_17, full_3, float("1e-06"), True) + del mean_17 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_17 = paddle._C_ops.rsqrt(scale_33) + del scale_33 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_35 = paddle._C_ops.multiply(add_28, rsqrt_17) + del rsqrt_17 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_36 = paddle._C_ops.multiply(parameter_56, multiply_35) + del multiply_35, parameter_56 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_69 = paddle._C_ops.matmul(multiply_36, parameter_60, False, False) + del multiply_36, parameter_60 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_69, full_int_array_2) + del matmul_69 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_70 = paddle._C_ops.matmul(dropout_50, parameter_59, False, False) + del parameter_59 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_70, full_int_array_2) + del matmul_70 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_39 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_71 = paddle._C_ops.matmul(dropout_50, parameter_58, False, False) + del parameter_58 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_71, full_int_array_2) + del matmul_71 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_72 = paddle._C_ops.matmul(transpose_38, transpose_39, False, True) + del transpose_38 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_29 = paddle._C_ops.add(matmul_72, add_23) + del matmul_72 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_9 = paddle._C_ops.softmax(add_29, -1) + del add_29 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_73 = paddle._C_ops.matmul(dropout_70, transpose_40, False, False) + del dropout_70 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_41 = paddle._C_ops.transpose(matmul_73, [0, 2, 1, 3]) + del matmul_73 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_41, full_int_array_4) + del transpose_41 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_74 = paddle._C_ops.matmul(reshape_39, parameter_57, False, False) + del parameter_57, reshape_39 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_74, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_74 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_30 = paddle._C_ops.add(add_28, dropout_72) + del add_28, dropout_72 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_18 = paddle._C_ops.pow(add_30, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_18 = paddle._C_ops.mean(pow_18, full_int_array_1, True) + del pow_18 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_34 = paddle._C_ops.scale(mean_18, full_3, float("1e-06"), True) + del mean_18 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_18 = paddle._C_ops.rsqrt(scale_34) + del scale_34 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_37 = paddle._C_ops.multiply(add_30, rsqrt_18) + del rsqrt_18 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_38 = paddle._C_ops.multiply(parameter_53, multiply_37) + del multiply_37, parameter_53 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_75 = paddle._C_ops.matmul(multiply_38, parameter_55, False, False) + del multiply_38, parameter_55 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_7 = paddle._C_ops.relu(matmul_75) + del matmul_75 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_7, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_7 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_76 = paddle._C_ops.matmul(dropout_74, parameter_54, False, False) + del dropout_74, parameter_54 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_76, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_76 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_31 = paddle._C_ops.add(dropout_76, add_30) + del add_30, dropout_76 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_19 = paddle._C_ops.pow(add_31, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_19 = paddle._C_ops.mean(pow_19, full_int_array_1, True) + del pow_19 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_35 = paddle._C_ops.scale(mean_19, full_3, float("1e-06"), True) + del mean_19 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_19 = paddle._C_ops.rsqrt(scale_35) + del scale_35 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_39 = paddle._C_ops.multiply(add_31, rsqrt_19) + del rsqrt_19 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_40 = paddle._C_ops.multiply(parameter_48, multiply_39) + del multiply_39, parameter_48 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_77 = paddle._C_ops.matmul(multiply_40, parameter_52, False, False) + del parameter_52 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(matmul_77, full_int_array_2) + del matmul_77 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_78 = paddle._C_ops.matmul(multiply_40, parameter_51, False, False) + del parameter_51 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(matmul_78, full_int_array_2) + del matmul_78 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_43 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_79 = paddle._C_ops.matmul(multiply_40, parameter_50, False, False) + del multiply_40, parameter_50 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_79, full_int_array_2) + del matmul_79 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_80 = paddle._C_ops.matmul(transpose_42, transpose_43, False, True) + del transpose_42 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_32 = paddle._C_ops.add(matmul_80, add_20) + del matmul_80 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_10 = paddle._C_ops.softmax(add_32, -1) + del add_32 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_81 = paddle._C_ops.matmul(dropout_78, transpose_44, False, False) + del dropout_78 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_45 = paddle._C_ops.transpose(matmul_81, [0, 2, 1, 3]) + del matmul_81 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_45, full_int_array_4) + del transpose_45 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_82 = paddle._C_ops.matmul(reshape_43, parameter_49, False, False) + del parameter_49, reshape_43 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_82, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_82 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_33 = paddle._C_ops.add(add_31, dropout_80) + del add_31, dropout_80 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_20 = paddle._C_ops.pow(add_33, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_20 = paddle._C_ops.mean(pow_20, full_int_array_1, True) + del pow_20 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_36 = paddle._C_ops.scale(mean_20, full_3, float("1e-06"), True) + del mean_20 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_20 = paddle._C_ops.rsqrt(scale_36) + del scale_36 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_41 = paddle._C_ops.multiply(add_33, rsqrt_20) + del rsqrt_20 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_42 = paddle._C_ops.multiply(parameter_43, multiply_41) + del multiply_41, parameter_43 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_83 = paddle._C_ops.matmul(multiply_42, parameter_47, False, False) + del multiply_42, parameter_47 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_83, full_int_array_2) + del matmul_83 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_84 = paddle._C_ops.matmul(dropout_50, parameter_46, False, False) + del parameter_46 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_84, full_int_array_2) + del matmul_84 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_47 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_85 = paddle._C_ops.matmul(dropout_50, parameter_45, False, False) + del parameter_45 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(matmul_85, full_int_array_2) + del matmul_85 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_48 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_86 = paddle._C_ops.matmul(transpose_46, transpose_47, False, True) + del transpose_46 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_34 = paddle._C_ops.add(matmul_86, add_23) + del matmul_86 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_11 = paddle._C_ops.softmax(add_34, -1) + del add_34 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_87 = paddle._C_ops.matmul(dropout_82, transpose_48, False, False) + del dropout_82 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_49 = paddle._C_ops.transpose(matmul_87, [0, 2, 1, 3]) + del matmul_87 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_49, full_int_array_4) + del transpose_49 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_88 = paddle._C_ops.matmul(reshape_47, parameter_44, False, False) + del parameter_44, reshape_47 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_88, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_88 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_35 = paddle._C_ops.add(add_33, dropout_84) + del add_33, dropout_84 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_21 = paddle._C_ops.pow(add_35, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_21 = paddle._C_ops.mean(pow_21, full_int_array_1, True) + del pow_21 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_37 = paddle._C_ops.scale(mean_21, full_3, float("1e-06"), True) + del mean_21 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_21 = paddle._C_ops.rsqrt(scale_37) + del scale_37 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_43 = paddle._C_ops.multiply(add_35, rsqrt_21) + del rsqrt_21 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_44 = paddle._C_ops.multiply(parameter_40, multiply_43) + del multiply_43, parameter_40 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_89 = paddle._C_ops.matmul(multiply_44, parameter_42, False, False) + del multiply_44, parameter_42 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_8 = paddle._C_ops.relu(matmul_89) + del matmul_89 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_8, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_8 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_90 = paddle._C_ops.matmul(dropout_86, parameter_41, False, False) + del dropout_86, parameter_41 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_90, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_90 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_36 = paddle._C_ops.add(dropout_88, add_35) + del add_35, dropout_88 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_22 = paddle._C_ops.pow(add_36, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_22 = paddle._C_ops.mean(pow_22, full_int_array_1, True) + del pow_22 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_38 = paddle._C_ops.scale(mean_22, full_3, float("1e-06"), True) + del mean_22 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_22 = paddle._C_ops.rsqrt(scale_38) + del scale_38 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_45 = paddle._C_ops.multiply(add_36, rsqrt_22) + del rsqrt_22 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_46 = paddle._C_ops.multiply(parameter_35, multiply_45) + del multiply_45, parameter_35 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_91 = paddle._C_ops.matmul(multiply_46, parameter_39, False, False) + del parameter_39 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(matmul_91, full_int_array_2) + del matmul_91 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_50 = paddle._C_ops.transpose(reshape_48, [0, 2, 1, 3]) + del reshape_48 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_92 = paddle._C_ops.matmul(multiply_46, parameter_38, False, False) + del parameter_38 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(matmul_92, full_int_array_2) + del matmul_92 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_51 = paddle._C_ops.transpose(reshape_49, [0, 2, 1, 3]) + del reshape_49 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_93 = paddle._C_ops.matmul(multiply_46, parameter_37, False, False) + del multiply_46, parameter_37 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(matmul_93, full_int_array_2) + del matmul_93 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_52 = paddle._C_ops.transpose(reshape_50, [0, 2, 1, 3]) + del reshape_50 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_94 = paddle._C_ops.matmul(transpose_50, transpose_51, False, True) + del transpose_50 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_37 = paddle._C_ops.add(matmul_94, add_20) + del matmul_94 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_12 = paddle._C_ops.softmax(add_37, -1) + del add_37 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_12, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_12 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_95 = paddle._C_ops.matmul(dropout_90, transpose_52, False, False) + del dropout_90 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_53 = paddle._C_ops.transpose(matmul_95, [0, 2, 1, 3]) + del matmul_95 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_51 = paddle._C_ops.reshape(transpose_53, full_int_array_4) + del transpose_53 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_96 = paddle._C_ops.matmul(reshape_51, parameter_36, False, False) + del parameter_36, reshape_51 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_96, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_96 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_38 = paddle._C_ops.add(add_36, dropout_92) + del add_36, dropout_92 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_23 = paddle._C_ops.pow(add_38, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_23 = paddle._C_ops.mean(pow_23, full_int_array_1, True) + del pow_23 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_39 = paddle._C_ops.scale(mean_23, full_3, float("1e-06"), True) + del mean_23 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_23 = paddle._C_ops.rsqrt(scale_39) + del scale_39 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_47 = paddle._C_ops.multiply(add_38, rsqrt_23) + del rsqrt_23 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_48 = paddle._C_ops.multiply(parameter_30, multiply_47) + del multiply_47, parameter_30 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_97 = paddle._C_ops.matmul(multiply_48, parameter_34, False, False) + del multiply_48, parameter_34 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(matmul_97, full_int_array_2) + del matmul_97 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_54 = paddle._C_ops.transpose(reshape_52, [0, 2, 1, 3]) + del reshape_52 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_98 = paddle._C_ops.matmul(dropout_50, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(matmul_98, full_int_array_2) + del matmul_98 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_55 = paddle._C_ops.transpose(reshape_53, [0, 2, 1, 3]) + del reshape_53 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_99 = paddle._C_ops.matmul(dropout_50, parameter_32, False, False) + del parameter_32 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(matmul_99, full_int_array_2) + del matmul_99 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_56 = paddle._C_ops.transpose(reshape_54, [0, 2, 1, 3]) + del reshape_54 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_100 = paddle._C_ops.matmul(transpose_54, transpose_55, False, True) + del transpose_54 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_39 = paddle._C_ops.add(matmul_100, add_23) + del matmul_100 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_13 = paddle._C_ops.softmax(add_39, -1) + del add_39 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_13, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_13 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_101 = paddle._C_ops.matmul(dropout_94, transpose_56, False, False) + del dropout_94 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_57 = paddle._C_ops.transpose(matmul_101, [0, 2, 1, 3]) + del matmul_101 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(transpose_57, full_int_array_4) + del transpose_57 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_102 = paddle._C_ops.matmul(reshape_55, parameter_31, False, False) + del parameter_31, reshape_55 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_102, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_102 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_40 = paddle._C_ops.add(add_38, dropout_96) + del add_38, dropout_96 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_24 = paddle._C_ops.pow(add_40, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_24 = paddle._C_ops.mean(pow_24, full_int_array_1, True) + del pow_24 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_40 = paddle._C_ops.scale(mean_24, full_3, float("1e-06"), True) + del mean_24 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_24 = paddle._C_ops.rsqrt(scale_40) + del scale_40 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_49 = paddle._C_ops.multiply(add_40, rsqrt_24) + del rsqrt_24 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_50 = paddle._C_ops.multiply(parameter_27, multiply_49) + del multiply_49, parameter_27 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_103 = paddle._C_ops.matmul(multiply_50, parameter_29, False, False) + del multiply_50, parameter_29 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_9 = paddle._C_ops.relu(matmul_103) + del matmul_103 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_9, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_9 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_104 = paddle._C_ops.matmul(dropout_98, parameter_28, False, False) + del dropout_98, parameter_28 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_104, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_104 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_41 = paddle._C_ops.add(dropout_100, add_40) + del add_40, dropout_100 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_25 = paddle._C_ops.pow(add_41, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_25 = paddle._C_ops.mean(pow_25, full_int_array_1, True) + del pow_25 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_41 = paddle._C_ops.scale(mean_25, full_3, float("1e-06"), True) + del mean_25 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_25 = paddle._C_ops.rsqrt(scale_41) + del scale_41 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_51 = paddle._C_ops.multiply(add_41, rsqrt_25) + del rsqrt_25 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_52 = paddle._C_ops.multiply(parameter_22, multiply_51) + del multiply_51, parameter_22 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_105 = paddle._C_ops.matmul(multiply_52, parameter_26, False, False) + del parameter_26 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(matmul_105, full_int_array_2) + del matmul_105 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_58 = paddle._C_ops.transpose(reshape_56, [0, 2, 1, 3]) + del reshape_56 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_106 = paddle._C_ops.matmul(multiply_52, parameter_25, False, False) + del parameter_25 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(matmul_106, full_int_array_2) + del matmul_106 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_59 = paddle._C_ops.transpose(reshape_57, [0, 2, 1, 3]) + del reshape_57 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_107 = paddle._C_ops.matmul(multiply_52, parameter_24, False, False) + del multiply_52, parameter_24 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(matmul_107, full_int_array_2) + del matmul_107 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_60 = paddle._C_ops.transpose(reshape_58, [0, 2, 1, 3]) + del reshape_58 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_108 = paddle._C_ops.matmul(transpose_58, transpose_59, False, True) + del transpose_58 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_42 = paddle._C_ops.add(matmul_108, add_20) + del matmul_108 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_14 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_102, dropout_103 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_14, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_14 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_109 = paddle._C_ops.matmul(dropout_102, transpose_60, False, False) + del dropout_102 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_61 = paddle._C_ops.transpose(matmul_109, [0, 2, 1, 3]) + del matmul_109 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_59 = paddle._C_ops.reshape(transpose_61, full_int_array_4) + del transpose_61 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_110 = paddle._C_ops.matmul(reshape_59, parameter_23, False, False) + del parameter_23, reshape_59 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_104, dropout_105 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_110, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_110 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_43 = paddle._C_ops.add(add_41, dropout_104) + del add_41, dropout_104 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_26 = paddle._C_ops.pow(add_43, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_26 = paddle._C_ops.mean(pow_26, full_int_array_1, True) + del pow_26 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_42 = paddle._C_ops.scale(mean_26, full_3, float("1e-06"), True) + del mean_26 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_26 = paddle._C_ops.rsqrt(scale_42) + del scale_42 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_53 = paddle._C_ops.multiply(add_43, rsqrt_26) + del rsqrt_26 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_54 = paddle._C_ops.multiply(parameter_17, multiply_53) + del multiply_53, parameter_17 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_111 = paddle._C_ops.matmul(multiply_54, parameter_21, False, False) + del multiply_54, parameter_21 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(matmul_111, full_int_array_2) + del matmul_111 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_62 = paddle._C_ops.transpose(reshape_60, [0, 2, 1, 3]) + del reshape_60 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_112 = paddle._C_ops.matmul(dropout_50, parameter_20, False, False) + del parameter_20 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(matmul_112, full_int_array_2) + del matmul_112 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_63 = paddle._C_ops.transpose(reshape_61, [0, 2, 1, 3]) + del reshape_61 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_113 = paddle._C_ops.matmul(dropout_50, parameter_19, False, False) + del parameter_19 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(matmul_113, full_int_array_2) + del matmul_113 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_64 = paddle._C_ops.transpose(reshape_62, [0, 2, 1, 3]) + del reshape_62 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_114 = paddle._C_ops.matmul(transpose_62, transpose_63, False, True) + del transpose_62 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_44 = paddle._C_ops.add(matmul_114, add_23) + del matmul_114 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_15 = paddle._C_ops.softmax(add_44, -1) + del add_44 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_106, dropout_107 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_15, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_15 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_115 = paddle._C_ops.matmul(dropout_106, transpose_64, False, False) + del dropout_106 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_65 = paddle._C_ops.transpose(matmul_115, [0, 2, 1, 3]) + del matmul_115 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(transpose_65, full_int_array_4) + del transpose_65 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_116 = paddle._C_ops.matmul(reshape_63, parameter_18, False, False) + del parameter_18, reshape_63 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_108, dropout_109 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_116, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_116 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_45 = paddle._C_ops.add(add_43, dropout_108) + del add_43, dropout_108 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_27 = paddle._C_ops.pow(add_45, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_27 = paddle._C_ops.mean(pow_27, full_int_array_1, True) + del pow_27 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_43 = paddle._C_ops.scale(mean_27, full_3, float("1e-06"), True) + del mean_27 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_27 = paddle._C_ops.rsqrt(scale_43) + del scale_43 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_55 = paddle._C_ops.multiply(add_45, rsqrt_27) + del rsqrt_27 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_56 = paddle._C_ops.multiply(parameter_14, multiply_55) + del multiply_55, parameter_14 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_117 = paddle._C_ops.matmul(multiply_56, parameter_16, False, False) + del multiply_56, parameter_16 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_10 = paddle._C_ops.relu(matmul_117) + del matmul_117 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_110, dropout_111 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_10, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_10 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_118 = paddle._C_ops.matmul(dropout_110, parameter_15, False, False) + del dropout_110, parameter_15 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_112, dropout_113 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_118, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_118 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_46 = paddle._C_ops.add(dropout_112, add_45) + del add_45, dropout_112 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_28 = paddle._C_ops.pow(add_46, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_28 = paddle._C_ops.mean(pow_28, full_int_array_1, True) + del pow_28 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_44 = paddle._C_ops.scale(mean_28, full_3, float("1e-06"), True) + del mean_28 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_28 = paddle._C_ops.rsqrt(scale_44) + del scale_44 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_57 = paddle._C_ops.multiply(add_46, rsqrt_28) + del rsqrt_28 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_58 = paddle._C_ops.multiply(parameter_9, multiply_57) + del multiply_57, parameter_9 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_119 = paddle._C_ops.matmul(multiply_58, parameter_13, False, False) + del parameter_13 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(matmul_119, full_int_array_2) + del matmul_119 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_66 = paddle._C_ops.transpose(reshape_64, [0, 2, 1, 3]) + del reshape_64 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_120 = paddle._C_ops.matmul(multiply_58, parameter_12, False, False) + del parameter_12 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(matmul_120, full_int_array_2) + del matmul_120 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_67 = paddle._C_ops.transpose(reshape_65, [0, 2, 1, 3]) + del reshape_65 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_121 = paddle._C_ops.matmul(multiply_58, parameter_11, False, False) + del multiply_58, parameter_11 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(matmul_121, full_int_array_2) + del matmul_121 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_68 = paddle._C_ops.transpose(reshape_66, [0, 2, 1, 3]) + del reshape_66 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_122 = paddle._C_ops.matmul(transpose_66, transpose_67, False, True) + del transpose_66 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_47 = paddle._C_ops.add(matmul_122, add_20) + del add_20, matmul_122 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_16 = paddle._C_ops.softmax(add_47, -1) + del add_47 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_114, dropout_115 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_16, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_16 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_123 = paddle._C_ops.matmul(dropout_114, transpose_68, False, False) + del dropout_114 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_69 = paddle._C_ops.transpose(matmul_123, [0, 2, 1, 3]) + del matmul_123 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_67 = paddle._C_ops.reshape(transpose_69, full_int_array_4) + del transpose_69 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_124 = paddle._C_ops.matmul(reshape_67, parameter_10, False, False) + del parameter_10, reshape_67 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_116, dropout_117 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_124, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_124 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_48 = paddle._C_ops.add(add_46, dropout_116) + del add_46, dropout_116 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_29 = paddle._C_ops.pow(add_48, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_29 = paddle._C_ops.mean(pow_29, full_int_array_1, True) + del pow_29 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_45 = paddle._C_ops.scale(mean_29, full_3, float("1e-06"), True) + del mean_29 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_29 = paddle._C_ops.rsqrt(scale_45) + del scale_45 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_59 = paddle._C_ops.multiply(add_48, rsqrt_29) + del rsqrt_29 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_60 = paddle._C_ops.multiply(parameter_4, multiply_59) + del multiply_59, parameter_4 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_125 = paddle._C_ops.matmul(multiply_60, parameter_8, False, False) + del multiply_60, parameter_8 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(matmul_125, full_int_array_2) + del matmul_125 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_70 = paddle._C_ops.transpose(reshape_68, [0, 2, 1, 3]) + del reshape_68 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_126 = paddle._C_ops.matmul(dropout_50, parameter_7, False, False) + del parameter_7 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_69 = paddle._C_ops.reshape(matmul_126, full_int_array_2) + del matmul_126 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_71 = paddle._C_ops.transpose(reshape_69, [0, 2, 1, 3]) + del reshape_69 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_127 = paddle._C_ops.matmul(dropout_50, parameter_6, False, False) + del parameter_6 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(matmul_127, full_int_array_2) + del full_int_array_2, matmul_127 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_72 = paddle._C_ops.transpose(reshape_70, [0, 2, 1, 3]) + del reshape_70 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_128 = paddle._C_ops.matmul(transpose_70, transpose_71, False, True) + del transpose_70 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_49 = paddle._C_ops.add(matmul_128, add_23) + del add_23, matmul_128 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_17 = paddle._C_ops.softmax(add_49, -1) + del add_49 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_118, dropout_119 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_17, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_17 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_129 = paddle._C_ops.matmul(dropout_118, transpose_72, False, False) + del dropout_118 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_73 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(transpose_73, full_int_array_4) + del full_int_array_4, transpose_73 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_130 = paddle._C_ops.matmul(reshape_71, parameter_5, False, False) + del parameter_5, reshape_71 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_120, dropout_121 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_130, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_130 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_50 = paddle._C_ops.add(add_48, dropout_120) + del add_48, dropout_120 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_30 = paddle._C_ops.pow(add_50, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_30 = paddle._C_ops.mean(pow_30, full_int_array_1, True) + del pow_30 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_46 = paddle._C_ops.scale(mean_30, full_3, float("1e-06"), True) + del mean_30 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_30 = paddle._C_ops.rsqrt(scale_46) + del scale_46 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_61 = paddle._C_ops.multiply(add_50, rsqrt_30) + del rsqrt_30 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_62 = paddle._C_ops.multiply(parameter_1, multiply_61) + del multiply_61, parameter_1 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_131 = paddle._C_ops.matmul(multiply_62, parameter_3, False, False) + del multiply_62, parameter_3 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_11 = paddle._C_ops.relu(matmul_131) + del matmul_131 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_122, dropout_123 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_11, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_11 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_132 = paddle._C_ops.matmul(dropout_122, parameter_2, False, False) + del dropout_122, parameter_2 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_124, dropout_125 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_132, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_132 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_51 = paddle._C_ops.add(dropout_124, add_50) + del add_50, dropout_124 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_31 = paddle._C_ops.pow(add_51, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_31 = paddle._C_ops.mean(pow_31, full_int_array_1, True) + del full_int_array_1, pow_31 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_47 = paddle._C_ops.scale(mean_31, full_3, float("1e-06"), True) + del full_3, mean_31 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_31 = paddle._C_ops.rsqrt(scale_47) + del scale_47 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_63 = paddle._C_ops.multiply(add_51, rsqrt_31) + del add_51, rsqrt_31 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_64 = paddle._C_ops.multiply(parameter_0, multiply_63) + del multiply_63, parameter_0 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_126, dropout_127 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + multiply_64, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_2, multiply_64 + + # pd_op.full: (1xf32) <- () + full_21 = paddle._C_ops.full( + [1], float("0.0441942"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x512xf32) <- (1x1x512xf32, 1xf32) + scale_48 = paddle._C_ops.scale(dropout_126, full_21, float("0"), True) + del dropout_126, full_21 + + # pd_op.matmul: (1x1x32128xf32) <- (1x1x512xf32, 32128x512xf32) + matmul_0 = paddle._C_ops.matmul(scale_48, parameter_130, False, True) + del ( + dropout_50, + parameter_130, + scale_48, + transpose_26, + transpose_27, + transpose_31, + transpose_32, + transpose_35, + transpose_36, + transpose_39, + transpose_40, + transpose_43, + transpose_44, + transpose_47, + transpose_48, + transpose_51, + transpose_52, + transpose_55, + transpose_56, + transpose_59, + transpose_60, + transpose_63, + transpose_64, + transpose_67, + transpose_68, + transpose_71, + transpose_72, + ) + + return matmul_0 diff --git a/paddle_samples/PaddleNLP/t5-small/weight_meta.py b/paddle_samples/PaddleNLP/t5-small/weight_meta.py new file mode 100644 index 000000000..a5a523c26 --- /dev/null +++ b/paddle_samples/PaddleNLP/t5-small/weight_meta.py @@ -0,0 +1,1439 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [512] + dtype = "float32" + min_val = float("-0.000406265") + max_val = float("9.5625") + mean = float("0.238993") + std = float("0.528551") + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [512] + dtype = "float32" + min_val = float("-1.03125") + max_val = float("12.125") + mean = float("3.0563") + std = float("1.25819") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [2048, 512] + dtype = "float32" + min_val = float("-45.25") + max_val = float("24.5") + mean = float("0.000320316") + std = float("0.469186") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [512, 2048] + dtype = "float32" + min_val = float("-13.6875") + max_val = float("10.25") + mean = float("0.00696439") + std = float("0.82078") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [512] + dtype = "float32" + min_val = float("-0.164062") + max_val = float("1.52344") + mean = float("0.142325") + std = float("0.0974383") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [512, 512] + dtype = "float32" + min_val = float("-20.5") + max_val = float("19.25") + mean = float("-0.00232446") + std = float("1.47252") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [512, 512] + dtype = "float32" + min_val = float("-12.3125") + max_val = float("10.8125") + mean = float("-0.00313481") + std = float("1.79793") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.03125") + max_val = float("4.71875") + mean = float("0.000654718") + std = float("0.387374") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.233398") + max_val = float("0.21875") + mean = float("-3.2276e-05") + std = float("0.0427395") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [512] + dtype = "float32" + min_val = float("-0.341797") + max_val = float("0.789062") + mean = float("0.221493") + std = float("0.079524") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [512, 512] + dtype = "float32" + min_val = float("-42.75") + max_val = float("45.75") + mean = float("-0.00122536") + std = float("1.53928") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.53125") + max_val = float("5.96875") + mean = float("0.00298593") + std = float("0.953516") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.5") + max_val = float("2.32812") + mean = float("0.000342449") + std = float("0.394908") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.261719") + max_val = float("0.285156") + mean = float("0.000135268") + std = float("0.0427585") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [512] + dtype = "float32" + min_val = float("0.375") + max_val = float("7.65625") + mean = float("2.4388") + std = float("0.69339") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [2048, 512] + dtype = "float32" + min_val = float("-16.25") + max_val = float("10.5") + mean = float("-0.000124255") + std = float("0.419801") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [512, 2048] + dtype = "float32" + min_val = float("-10.625") + max_val = float("10.5") + mean = float("0.0074399") + std = float("0.694377") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [512] + dtype = "float32" + min_val = float("-0.0810547") + max_val = float("0.464844") + mean = float("0.0716004") + std = float("0.0325611") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [512, 512] + dtype = "float32" + min_val = float("-25.5") + max_val = float("21.0") + mean = float("0.000646527") + std = float("1.26897") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [512, 512] + dtype = "float32" + min_val = float("-8.25") + max_val = float("7.75") + mean = float("-0.00116448") + std = float("1.22014") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.65625") + max_val = float("6.34375") + mean = float("0.000291407") + std = float("0.478891") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.255859") + max_val = float("0.271484") + mean = float("-0.000160606") + std = float("0.0556473") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [512] + dtype = "float32" + min_val = float("-0.0688477") + max_val = float("0.53125") + mean = float("0.1733") + std = float("0.0362951") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [512, 512] + dtype = "float32" + min_val = float("-48.5") + max_val = float("37.5") + mean = float("-0.00320207") + std = float("2.31977") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.875") + max_val = float("4.40625") + mean = float("0.0014279") + std = float("0.891554") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.70312") + max_val = float("2.4375") + mean = float("-0.000709436") + std = float("0.410992") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.306641") + max_val = float("0.324219") + mean = float("-1.23639e-05") + std = float("0.0434743") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [512] + dtype = "float32" + min_val = float("0.332031") + max_val = float("4.71875") + mean = float("1.9234") + std = float("0.349066") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [2048, 512] + dtype = "float32" + min_val = float("-10.0625") + max_val = float("10.6875") + mean = float("-0.00030282") + std = float("0.399087") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [512, 2048] + dtype = "float32" + min_val = float("-8.6875") + max_val = float("8.5625") + mean = float("0.0142616") + std = float("0.680091") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [512] + dtype = "float32" + min_val = float("-0.0791016") + max_val = float("0.435547") + mean = float("0.119905") + std = float("0.0375838") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [512, 512] + dtype = "float32" + min_val = float("-10.0625") + max_val = float("13.5625") + mean = float("9.86211e-05") + std = float("0.894709") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.0625") + max_val = float("6.59375") + mean = float("-0.00323875") + std = float("0.92144") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.65625") + max_val = float("4.5625") + mean = float("-0.00133923") + std = float("0.410349") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.279297") + max_val = float("0.263672") + mean = float("-2.56622e-05") + std = float("0.0439826") + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [512] + dtype = "float32" + min_val = float("-0.103516") + max_val = float("0.378906") + mean = float("0.154994") + std = float("0.0354193") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [512, 512] + dtype = "float32" + min_val = float("-21.75") + max_val = float("22.875") + mean = float("0.00349601") + std = float("1.43415") + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.0") + max_val = float("4.90625") + mean = float("-0.00221994") + std = float("0.958329") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.17188") + max_val = float("3.07812") + mean = float("-0.000142329") + std = float("0.418875") + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.339844") + max_val = float("0.439453") + mean = float("3.85278e-05") + std = float("0.0463164") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [512] + dtype = "float32" + min_val = float("0.310547") + max_val = float("3.09375") + mean = float("1.45117") + std = float("0.241399") + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [2048, 512] + dtype = "float32" + min_val = float("-14.1875") + max_val = float("9.0625") + mean = float("-0.000422012") + std = float("0.38754") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [512, 2048] + dtype = "float32" + min_val = float("-21.375") + max_val = float("18.0") + mean = float("0.019711") + std = float("0.736918") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [512] + dtype = "float32" + min_val = float("-0.0698242") + max_val = float("0.257812") + mean = float("0.0878949") + std = float("0.020732") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [512, 512] + dtype = "float32" + min_val = float("-9.3125") + max_val = float("9.375") + mean = float("-0.00120116") + std = float("0.733678") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.75") + max_val = float("3.73438") + mean = float("-0.000331075") + std = float("0.73921") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.8125") + max_val = float("5.46875") + mean = float("0.000311156") + std = float("0.392459") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.365234") + max_val = float("0.460938") + mean = float("-8.85896e-05") + std = float("0.0677922") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [512] + dtype = "float32" + min_val = float("-0.0869141") + max_val = float("0.304688") + mean = float("0.14316") + std = float("0.0355998") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [512, 512] + dtype = "float32" + min_val = float("-19.125") + max_val = float("23.375") + mean = float("0.000395767") + std = float("1.07214") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.40625") + max_val = float("4.71875") + mean = float("-0.00047506") + std = float("0.908469") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.25") + max_val = float("3.0625") + mean = float("-0.000608685") + std = float("0.406653") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.410156") + max_val = float("0.5") + mean = float("0.000177197") + std = float("0.0492003") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [512] + dtype = "float32" + min_val = float("0.304688") + max_val = float("2.26562") + mean = float("1.15984") + std = float("0.228704") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [2048, 512] + dtype = "float32" + min_val = float("-15.8125") + max_val = float("14.625") + mean = float("-0.00204277") + std = float("0.358594") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [512, 2048] + dtype = "float32" + min_val = float("-18.25") + max_val = float("25.25") + mean = float("0.0252502") + std = float("0.713721") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [512] + dtype = "float32" + min_val = float("-0.0786133") + max_val = float("0.198242") + mean = float("0.0967789") + std = float("0.0251307") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [512, 512] + dtype = "float32" + min_val = float("-7.5") + max_val = float("6.96875") + mean = float("-0.000955252") + std = float("0.651063") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.1875") + max_val = float("3.53125") + mean = float("-0.000889366") + std = float("0.636495") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.125") + max_val = float("4.84375") + mean = float("0.00154598") + std = float("0.351185") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.4375") + max_val = float("0.419922") + mean = float("-9.81584e-05") + std = float("0.0735168") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [512] + dtype = "float32" + min_val = float("0.0354004") + max_val = float("0.298828") + mean = float("0.131269") + std = float("0.0354128") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [512, 512] + dtype = "float32" + min_val = float("-17.0") + max_val = float("14.6875") + mean = float("0.00124212") + std = float("0.91165") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.98438") + max_val = float("3.8125") + mean = float("0.000928635") + std = float("0.724313") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.625") + max_val = float("2.48438") + mean = float("0.00132067") + std = float("0.429071") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.328125") + max_val = float("0.337891") + mean = float("-1.0113e-05") + std = float("0.0557238") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [512] + dtype = "float32" + min_val = float("0.113281") + max_val = float("5.25") + mean = float("0.67453") + std = float("0.326679") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [2048, 512] + dtype = "float32" + min_val = float("-10.1875") + max_val = float("7.375") + mean = float("-0.00203279") + std = float("0.335252") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [512, 2048] + dtype = "float32" + min_val = float("-15.75") + max_val = float("14.5625") + mean = float("0.014664") + std = float("0.619637") + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [512] + dtype = "float32" + min_val = float("0.022583") + max_val = float("0.71875") + mean = float("0.0890827") + std = float("0.0493023") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [512, 512] + dtype = "float32" + min_val = float("-12.125") + max_val = float("14.0625") + mean = float("-0.000343404") + std = float("0.716759") + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.5625") + max_val = float("2.28125") + mean = float("0.000567753") + std = float("0.469188") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.03125") + max_val = float("2.92188") + mean = float("-0.000232129") + std = float("0.392837") + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.34375") + max_val = float("0.306641") + mean = float("-0.000139535") + std = float("0.0599122") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [512] + dtype = "float32" + min_val = float("0.0385742") + max_val = float("0.476562") + mean = float("0.0893378") + std = float("0.0319224") + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [32, 8] + dtype = "float32" + min_val = float("-34.5") + max_val = float("48.0") + mean = float("-1.32047") + std = float("7.05675") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [512, 512] + dtype = "float32" + min_val = float("-12.5625") + max_val = float("13.75") + mean = float("0.000351022") + std = float("0.857384") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.45312") + max_val = float("2.39062") + mean = float("0.000828562") + std = float("0.489659") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.98438") + max_val = float("3.67188") + mean = float("0.00128421") + std = float("0.559587") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.511719") + max_val = float("0.53125") + mean = float("-2.00758e-05") + std = float("0.0698939") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [512] + dtype = "float32" + min_val = float("0.0319824") + max_val = float("0.355469") + mean = float("0.241696") + std = float("0.0792719") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [512] + dtype = "float32" + min_val = float("-0.239258") + max_val = float("3.54688") + mean = float("0.612829") + std = float("0.2203") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [2048, 512] + dtype = "float32" + min_val = float("-12.5") + max_val = float("11.1875") + mean = float("0.00199515") + std = float("0.555518") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [512, 2048] + dtype = "float32" + min_val = float("-68.5") + max_val = float("51.75") + mean = float("-0.00369408") + std = float("0.930026") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [512] + dtype = "float32" + min_val = float("0.0234375") + max_val = float("0.15625") + mean = float("0.119119") + std = float("0.0214491") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [512, 512] + dtype = "float32" + min_val = float("-20.875") + max_val = float("22.875") + mean = float("0.00219397") + std = float("1.72905") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.0625") + max_val = float("5.25") + mean = float("0.0022758") + std = float("0.896258") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.4375") + max_val = float("3.79688") + mean = float("-0.00106126") + std = float("0.436514") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.482422") + max_val = float("0.554688") + mean = float("-1.9731e-05") + std = float("0.0540952") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [512] + dtype = "float32" + min_val = float("0.246094") + max_val = float("4.75") + mean = float("0.756424") + std = float("0.255841") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [2048, 512] + dtype = "float32" + min_val = float("-13.4375") + max_val = float("16.75") + mean = float("0.00097069") + std = float("0.486232") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [512, 2048] + dtype = "float32" + min_val = float("-27.125") + max_val = float("30.25") + mean = float("-0.00128162") + std = float("0.900071") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [512] + dtype = "float32" + min_val = float("0.0275879") + max_val = float("0.177734") + mean = float("0.117132") + std = float("0.0188376") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [512, 512] + dtype = "float32" + min_val = float("-18.625") + max_val = float("14.6875") + mean = float("0.00229923") + std = float("1.33635") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.1875") + max_val = float("4.0625") + mean = float("0.000259673") + std = float("0.769611") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.375") + max_val = float("4.375") + mean = float("-0.0004771") + std = float("0.450516") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.558594") + max_val = float("0.648438") + mean = float("-0.000177306") + std = float("0.054454") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [512] + dtype = "float32" + min_val = float("0.226562") + max_val = float("4.09375") + mean = float("0.753399") + std = float("0.21234") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [2048, 512] + dtype = "float32" + min_val = float("-14.5625") + max_val = float("14.75") + mean = float("0.00074136") + std = float("0.439012") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [512, 2048] + dtype = "float32" + min_val = float("-15.3125") + max_val = float("18.875") + mean = float("0.00268636") + std = float("0.815878") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [512] + dtype = "float32" + min_val = float("-0.032959") + max_val = float("0.201172") + mean = float("0.11948") + std = float("0.0203831") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [512, 512] + dtype = "float32" + min_val = float("-16.375") + max_val = float("13.8125") + mean = float("0.000326929") + std = float("1.02065") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.45312") + max_val = float("3.75") + mean = float("0.00176018") + std = float("0.680957") + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.92188") + max_val = float("2.78125") + mean = float("-6.96151e-05") + std = float("0.44855") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.390625") + max_val = float("0.375") + mean = float("-2.9371e-05") + std = float("0.0547455") + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [512] + dtype = "float32" + min_val = float("0.203125") + max_val = float("2.5") + mean = float("0.613131") + std = float("0.150112") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [2048, 512] + dtype = "float32" + min_val = float("-11.5625") + max_val = float("7.3125") + mean = float("0.000362642") + std = float("0.406834") + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [512, 2048] + dtype = "float32" + min_val = float("-25.0") + max_val = float("34.5") + mean = float("0.00201512") + std = float("0.786157") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [512] + dtype = "float32" + min_val = float("0.0332031") + max_val = float("0.298828") + mean = float("0.136475") + std = float("0.0285545") + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [512, 512] + dtype = "float32" + min_val = float("-9.8125") + max_val = float("11.125") + mean = float("0.00299521") + std = float("0.807871") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.125") + max_val = float("5.1875") + mean = float("-0.000725675") + std = float("0.625774") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.76562") + max_val = float("2.57812") + mean = float("0.000150539") + std = float("0.378036") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.402344") + max_val = float("0.375") + mean = float("5.61201e-05") + std = float("0.0449669") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [512] + dtype = "float32" + min_val = float("0.137695") + max_val = float("1.57031") + mean = float("0.405777") + std = float("0.124192") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [2048, 512] + dtype = "float32" + min_val = float("-9.5") + max_val = float("15.0") + mean = float("-2.43034e-05") + std = float("0.360743") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [512, 2048] + dtype = "float32" + min_val = float("-16.75") + max_val = float("19.625") + mean = float("0.000799844") + std = float("0.758569") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [512] + dtype = "float32" + min_val = float("0.0291748") + max_val = float("0.361328") + mean = float("0.102077") + std = float("0.0278308") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [512, 512] + dtype = "float32" + min_val = float("-12.875") + max_val = float("12.875") + mean = float("0.00135863") + std = float("0.811788") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.5") + max_val = float("3.28125") + mean = float("-3.46814e-05") + std = float("0.590862") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.17188") + max_val = float("2.71875") + mean = float("-0.000842045") + std = float("0.46493") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.392578") + max_val = float("0.492188") + mean = float("-3.10072e-05") + std = float("0.0599718") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [512] + dtype = "float32" + min_val = float("0.0737305") + max_val = float("3.6875") + mean = float("0.30554") + std = float("0.221261") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [2048, 512] + dtype = "float32" + min_val = float("-8.1875") + max_val = float("8.9375") + mean = float("0.000126899") + std = float("0.305008") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [512, 2048] + dtype = "float32" + min_val = float("-16.625") + max_val = float("14.5") + mean = float("-0.000737352") + std = float("0.615552") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [512] + dtype = "float32" + min_val = float("0.0388184") + max_val = float("0.380859") + mean = float("0.0923548") + std = float("0.0335667") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [32, 8] + dtype = "float32" + min_val = float("-10.8125") + max_val = float("6.125") + mean = float("-0.748865") + std = float("2.72737") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [512, 512] + dtype = "float32" + min_val = float("-9.6875") + max_val = float("12.5625") + mean = float("-0.00112105") + std = float("0.637433") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.875") + max_val = float("2.32812") + mean = float("0.000568047") + std = float("0.449698") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.5") + max_val = float("4.0") + mean = float("-6.72496e-06") + std = float("0.4878") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.376953") + max_val = float("0.439453") + mean = float("0.000162044") + std = float("0.0597395") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [32128, 512] + dtype = "float32" + min_val = float("-792.0") + max_val = float("348.0") + mean = float("0.122392") + std = float("23.1937") + data = None From e7151531c22a51da5bbb463a3a887dc423324480 Mon Sep 17 00:00:00 2001 From: RbRe145 Date: Fri, 26 Sep 2025 02:55:01 +0000 Subject: [PATCH 4/4] fix nlp_getter f format --- graph_net/test/nlp_model_getter.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/graph_net/test/nlp_model_getter.py b/graph_net/test/nlp_model_getter.py index d795f7e30..151863137 100644 --- a/graph_net/test/nlp_model_getter.py +++ b/graph_net/test/nlp_model_getter.py @@ -229,7 +229,6 @@ def get_albert_model_and_inputs(model_name, text, dtype): if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.unk_token - # 4) 构造输入(支持 str 或 List[str]) enc = tokenizer( text, return_tensors="pd", @@ -238,7 +237,6 @@ def get_albert_model_and_inputs(model_name, text, dtype): max_length=512, ) - # 显式补 attention_mask(pad 处为 0) if "attention_mask" not in enc: input_ids = enc["input_ids"] enc["attention_mask"] = (input_ids != tokenizer.pad_token_id).astype("int64")