diff --git a/graph_net/test/nlp_model_getter.py b/graph_net/test/nlp_model_getter.py index 5ce710b2..986297ef 100644 --- a/graph_net/test/nlp_model_getter.py +++ b/graph_net/test/nlp_model_getter.py @@ -154,3 +154,91 @@ def get_xlnet_model_and_inputs(model_name, text, dtype): enc["attention_mask"] = (input_ids != pad_id).astype("int64") return model, enc + + +def get_t5_model_and_inputs(model_name, text, dtype): + import paddle + from paddlenlp.transformers import T5ForConditionalGeneration, T5Tokenizer + + # 1) 分词器(先建 tokenizer 方便取 pad/eos id) + tokenizer = T5Tokenizer.from_pretrained(model_name) + + # 2) 编码输入(支持单条或批量 text) + enc = tokenizer( + text, + return_tensors="pd", + padding=True, + truncation=True, + max_length=512, + ) + + # 补 attention_mask(pad 处为 0,其他为 1) + if "attention_mask" not in enc: + input_ids = enc["input_ids"] + attn_mask = (input_ids != tokenizer.pad_token_id).astype("int64") + enc["attention_mask"] = attn_mask + + # 构造 decoder_input_ids: + # T5 以 pad_token_id 作为 decoder_start_token_id + batch_size = enc["input_ids"].shape[0] + decoder_input_ids = paddle.full( + shape=[batch_size, 1], + fill_value=tokenizer.pad_token_id, + dtype="int64", + ) + + # 3) 加载模型 + model = T5ForConditionalGeneration.from_pretrained(model_name) + if dtype == "float16": + model = model.astype(paddle.float16) + model.eval() + + # 4) 组装喂给模型的输入 + inputs = { + "input_ids": enc["input_ids"], + "attention_mask": enc["attention_mask"], + "decoder_input_ids": decoder_input_ids, + } + return model, inputs + + +def get_albert_model_and_inputs(model_name, text, dtype): + """ + 加载 ALBERT backbone(AlbertModel)并构造输入。 + - model_name 例如: "albert-base-v2", "albert-xxlarge-v1"(PaddleNLP 内置名称) + - dtype: "float32" 或 "float16" + 返回: (model, inputs_dict) + """ + import paddle + from paddlenlp.transformers import AlbertConfig, AlbertModel, AlbertTokenizer + + # 1) 读取配置(不触发权重下载) + config = AlbertConfig.from_pretrained(model_name) + + # 2) 模型 + # 若你只需要网络结构,可改成: model = AlbertModel(config) + model = AlbertModel(config) + if dtype == "float16": + model = model.astype(paddle.float16) + model.eval() + + # 3) 分词器 + tokenizer = AlbertTokenizer.from_pretrained(model_name) + + # 若无 pad_token,则回退到 unk_token(ALBERT 没有 eos_token,别设 pad=eos) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.unk_token + + enc = tokenizer( + text, + return_tensors="pd", + padding=True, + truncation=True, + max_length=512, + ) + + if "attention_mask" not in enc: + input_ids = enc["input_ids"] + enc["attention_mask"] = (input_ids != tokenizer.pad_token_id).astype("int64") + + return model, enc \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-base-v1/graph_net.json b/paddle_samples/PaddleNLP/albert-base-v1/graph_net.json new file mode 100644 index 00000000..e0b36802 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v1/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-base-v1", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-base-v1/input_meta.py b/paddle_samples/PaddleNLP/albert-base-v1/input_meta.py new file mode 100644 index 00000000..b4583463 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v1/input_meta.py @@ -0,0 +1,41 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 21] + dtype = "int64" + data = [ + 2, + 10975, + 15, + 51, + 204, + 25, + 1909, + 9, + 31, + 589, + 2477, + 88, + 370, + 816, + 2761, + 17, + 66, + 2607, + 18, + 9, + 3, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 21] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 21] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-base-v1/model.py b/paddle_samples/PaddleNLP/albert-base-v1/model.py new file mode 100644 index 00000000..d13518e6 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v1/model.py @@ -0,0 +1,1900 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x21xi64) <- (1x21xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x21xi64) <- (1x1x21xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x21xf32) <- (1x1x1x21xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [21] + + # pd_op.slice: (1x21xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 30000x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x21x128xf32, 1x21xf32, 1x21xf32) <- (1x21x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x21x128xf32, 1x21x128xui8) <- (1x21x128xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_0 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x128xf32, 128x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_20, False, False) + del dropout_0, parameter_20 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 21, 12, 64] + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_3, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_5 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_7, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_8 = paddle._C_ops.add(add_2, dropout_4) + del add_2, dropout_4 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_0 = paddle._C_ops.gelu(add_9, False) + del add_9 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_8 = paddle._C_ops.matmul(gelu_0, parameter_4, False, False) + del gelu_0 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_11 = paddle._C_ops.add(add_10, layer_norm_3) + del add_10, layer_norm_3 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_4) + del add_12 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_3 = paddle._C_ops.scale(matmul_12, full_3, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_15 = paddle._C_ops.add(scale_3, scale_1) + del scale_3 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_13 = paddle._C_ops.matmul(dropout_6, transpose_6, False, False) + del dropout_6, transpose_6 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_16, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_17 = paddle._C_ops.add(layer_norm_6, dropout_8) + del dropout_8, layer_norm_6 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_1 = paddle._C_ops.gelu(add_18, False) + del add_18 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_16 = paddle._C_ops.matmul(gelu_1, parameter_4, False, False) + del gelu_1 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_20 = paddle._C_ops.add(add_19, layer_norm_9) + del add_19, layer_norm_9 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_4) + del add_21 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_4) + del add_22 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_4 = paddle._C_ops.scale(matmul_20, full_3, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_24 = paddle._C_ops.add(scale_4, scale_1) + del scale_4 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_21 = paddle._C_ops.matmul(dropout_10, transpose_10, False, False) + del dropout_10, transpose_10 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_25, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_25 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_26 = paddle._C_ops.add(layer_norm_12, dropout_12) + del dropout_12, layer_norm_12 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_2 = paddle._C_ops.gelu(add_27, False) + del add_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_24 = paddle._C_ops.matmul(gelu_2, parameter_4, False, False) + del gelu_2 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_29 = paddle._C_ops.add(add_28, layer_norm_15) + del add_28, layer_norm_15 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_4) + del add_30 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_4) + del add_31 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_4) + del add_32 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_28, full_3, float("0"), True) + del matmul_28 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_33 = paddle._C_ops.add(scale_5, scale_1) + del scale_5 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_29 = paddle._C_ops.matmul(dropout_14, transpose_14, False, False) + del dropout_14, transpose_14 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del transpose_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del reshape_15 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_34, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_34 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_35 = paddle._C_ops.add(layer_norm_18, dropout_16) + del dropout_16, layer_norm_18 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_3 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_32 = paddle._C_ops.matmul(gelu_3, parameter_4, False, False) + del gelu_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_38 = paddle._C_ops.add(add_37, layer_norm_21) + del add_37, layer_norm_21 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_15) + del matmul_33 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_13) + del matmul_34 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_35, parameter_11) + del matmul_35 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_39, full_int_array_4) + del add_39 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_40, full_int_array_4) + del add_40 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_41, full_int_array_4) + del add_41 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_36 = paddle._C_ops.matmul(transpose_16, transpose_17, False, True) + del transpose_16, transpose_17 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_6 = paddle._C_ops.scale(matmul_36, full_3, float("0"), True) + del matmul_36 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_42 = paddle._C_ops.add(scale_6, scale_1) + del scale_6 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_4 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_37 = paddle._C_ops.matmul(dropout_18, transpose_18, False, False) + del dropout_18, transpose_18 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_5) + del transpose_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_10, False, False) + del reshape_19 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_43 = paddle._C_ops.add(matmul_38, parameter_9) + del matmul_38 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_43, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_44 = paddle._C_ops.add(layer_norm_24, dropout_20) + del dropout_20, layer_norm_24 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_44, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_44 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_5) + del matmul_39 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_45, False) + del add_45 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_40 = paddle._C_ops.matmul(gelu_4, parameter_4, False, False) + del gelu_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_46 = paddle._C_ops.add(matmul_40, parameter_3) + del matmul_40 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_47 = paddle._C_ops.add(add_46, layer_norm_27) + del add_46, layer_norm_27 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_47, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_47 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_15) + del matmul_41 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_13) + del matmul_42 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_50 = paddle._C_ops.add(matmul_43, parameter_11) + del matmul_43 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_48, full_int_array_4) + del add_48 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_49, full_int_array_4) + del add_49 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_50, full_int_array_4) + del add_50 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_44 = paddle._C_ops.matmul(transpose_20, transpose_21, False, True) + del transpose_20, transpose_21 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_7 = paddle._C_ops.scale(matmul_44, full_3, float("0"), True) + del matmul_44 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_51 = paddle._C_ops.add(scale_7, scale_1) + del scale_7 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_5 = paddle._C_ops.softmax(add_51, -1) + del add_51 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_45 = paddle._C_ops.matmul(dropout_22, transpose_22, False, False) + del dropout_22, transpose_22 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_5) + del transpose_23 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_10, False, False) + del reshape_23 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_46, parameter_9) + del matmul_46 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_52, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_52 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_53 = paddle._C_ops.add(layer_norm_30, dropout_24) + del dropout_24, layer_norm_30 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_53 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_5) + del matmul_47 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_54, False) + del add_54 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_48 = paddle._C_ops.matmul(gelu_5, parameter_4, False, False) + del gelu_5 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_48, parameter_3) + del matmul_48 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_56 = paddle._C_ops.add(add_55, layer_norm_33) + del add_55, layer_norm_33 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_56 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_36, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_15) + del matmul_49 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_36, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_13) + del matmul_50 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(layer_norm_36, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_59 = paddle._C_ops.add(matmul_51, parameter_11) + del matmul_51 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_57, full_int_array_4) + del add_57 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_58, full_int_array_4) + del add_58 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_59, full_int_array_4) + del add_59 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_52 = paddle._C_ops.matmul(transpose_24, transpose_25, False, True) + del transpose_24, transpose_25 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_8 = paddle._C_ops.scale(matmul_52, full_3, float("0"), True) + del matmul_52 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_60 = paddle._C_ops.add(scale_8, scale_1) + del scale_8 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_6 = paddle._C_ops.softmax(add_60, -1) + del add_60 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_53 = paddle._C_ops.matmul(dropout_26, transpose_26, False, False) + del dropout_26, transpose_26 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_5) + del transpose_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_10, False, False) + del reshape_27 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_54, parameter_9) + del matmul_54 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_61, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_61 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_62 = paddle._C_ops.add(layer_norm_36, dropout_28) + del dropout_28, layer_norm_36 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_62 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_39, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_5) + del matmul_55 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_63, False) + del add_63 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(gelu_6, parameter_4, False, False) + del gelu_6 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_56, parameter_3) + del matmul_56 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_65 = paddle._C_ops.add(add_64, layer_norm_39) + del add_64, layer_norm_39 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_65 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_42, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_15) + del matmul_57 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_42, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_13) + del matmul_58 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(layer_norm_42, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_68 = paddle._C_ops.add(matmul_59, parameter_11) + del matmul_59 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_66, full_int_array_4) + del add_66 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_67, full_int_array_4) + del add_67 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_68, full_int_array_4) + del add_68 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_60 = paddle._C_ops.matmul(transpose_28, transpose_29, False, True) + del transpose_28, transpose_29 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_9 = paddle._C_ops.scale(matmul_60, full_3, float("0"), True) + del matmul_60 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_69 = paddle._C_ops.add(scale_9, scale_1) + del scale_9 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_7 = paddle._C_ops.softmax(add_69, -1) + del add_69 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_61 = paddle._C_ops.matmul(dropout_30, transpose_30, False, False) + del dropout_30, transpose_30 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_61, [0, 2, 1, 3]) + del matmul_61 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_5) + del transpose_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(reshape_31, parameter_10, False, False) + del reshape_31 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_70 = paddle._C_ops.add(matmul_62, parameter_9) + del matmul_62 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_70, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_70 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_71 = paddle._C_ops.add(layer_norm_42, dropout_32) + del dropout_32, layer_norm_42 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_71, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_71 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_45, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_5) + del matmul_63 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_72, False) + del add_72 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_64 = paddle._C_ops.matmul(gelu_7, parameter_4, False, False) + del gelu_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_73 = paddle._C_ops.add(matmul_64, parameter_3) + del matmul_64 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_74 = paddle._C_ops.add(add_73, layer_norm_45) + del add_73, layer_norm_45 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_74, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_74 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_48, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_15) + del matmul_65 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_48, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_13) + del matmul_66 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_48, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_77 = paddle._C_ops.add(matmul_67, parameter_11) + del matmul_67 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_75, full_int_array_4) + del add_75 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_76, full_int_array_4) + del add_76 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_77, full_int_array_4) + del add_77 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_68 = paddle._C_ops.matmul(transpose_32, transpose_33, False, True) + del transpose_32, transpose_33 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_10 = paddle._C_ops.scale(matmul_68, full_3, float("0"), True) + del matmul_68 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_78 = paddle._C_ops.add(scale_10, scale_1) + del scale_10 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_8 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_69 = paddle._C_ops.matmul(dropout_34, transpose_34, False, False) + del dropout_34, transpose_34 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_5) + del transpose_35 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_70 = paddle._C_ops.matmul(reshape_35, parameter_10, False, False) + del reshape_35 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_79 = paddle._C_ops.add(matmul_70, parameter_9) + del matmul_70 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_79, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_79 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_80 = paddle._C_ops.add(layer_norm_48, dropout_36) + del dropout_36, layer_norm_48 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_80 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_71 = paddle._C_ops.matmul(layer_norm_51, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_5) + del matmul_71 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_72 = paddle._C_ops.matmul(gelu_8, parameter_4, False, False) + del gelu_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_82 = paddle._C_ops.add(matmul_72, parameter_3) + del matmul_72 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_83 = paddle._C_ops.add(add_82, layer_norm_51) + del add_82, layer_norm_51 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_83 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_54, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_15) + del matmul_73 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_54, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_13) + del matmul_74 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_54, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_75, parameter_11) + del matmul_75 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_84, full_int_array_4) + del add_84 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_85, full_int_array_4) + del add_85 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_86, full_int_array_4) + del add_86 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_76 = paddle._C_ops.matmul(transpose_36, transpose_37, False, True) + del transpose_36, transpose_37 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_11 = paddle._C_ops.scale(matmul_76, full_3, float("0"), True) + del matmul_76 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_87 = paddle._C_ops.add(scale_11, scale_1) + del scale_11 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_9 = paddle._C_ops.softmax(add_87, -1) + del add_87 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_77 = paddle._C_ops.matmul(dropout_38, transpose_38, False, False) + del dropout_38, transpose_38 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_77, [0, 2, 1, 3]) + del matmul_77 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_5) + del transpose_39 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(reshape_39, parameter_10, False, False) + del reshape_39 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_88 = paddle._C_ops.add(matmul_78, parameter_9) + del matmul_78 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_88, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_88 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_89 = paddle._C_ops.add(layer_norm_54, dropout_40) + del dropout_40, layer_norm_54 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_89, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_89 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_57, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_5) + del matmul_79 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_90, False) + del add_90 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_80 = paddle._C_ops.matmul(gelu_9, parameter_4, False, False) + del gelu_9 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_91 = paddle._C_ops.add(matmul_80, parameter_3) + del matmul_80 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_92 = paddle._C_ops.add(add_91, layer_norm_57) + del add_91, layer_norm_57 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_92, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_92 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_60, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_15) + del matmul_81 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_60, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_13) + del matmul_82 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_60, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_95 = paddle._C_ops.add(matmul_83, parameter_11) + del matmul_83 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_93, full_int_array_4) + del add_93 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_94, full_int_array_4) + del add_94 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_95, full_int_array_4) + del add_95 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_84 = paddle._C_ops.matmul(transpose_40, transpose_41, False, True) + del transpose_40, transpose_41 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_12 = paddle._C_ops.scale(matmul_84, full_3, float("0"), True) + del matmul_84 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_96 = paddle._C_ops.add(scale_12, scale_1) + del scale_12 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_10 = paddle._C_ops.softmax(add_96, -1) + del add_96 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_85 = paddle._C_ops.matmul(dropout_42, transpose_42, False, False) + del dropout_42, transpose_42 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_85, [0, 2, 1, 3]) + del matmul_85 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_5) + del transpose_43 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_86 = paddle._C_ops.matmul(reshape_43, parameter_10, False, False) + del reshape_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_86, parameter_9) + del matmul_86 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_97, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_97 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_98 = paddle._C_ops.add(layer_norm_60, dropout_44) + del dropout_44, layer_norm_60 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_98 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_87 = paddle._C_ops.matmul(layer_norm_63, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_5) + del matmul_87 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_99, False) + del add_99 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_88 = paddle._C_ops.matmul(gelu_10, parameter_4, False, False) + del gelu_10 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_100 = paddle._C_ops.add(matmul_88, parameter_3) + del matmul_88 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_101 = paddle._C_ops.add(add_100, layer_norm_63) + del add_100, layer_norm_63 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_101 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_66, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_15) + del matmul_89, parameter_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_66, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_13) + del matmul_90, parameter_13 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_66, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_104 = paddle._C_ops.add(matmul_91, parameter_11) + del matmul_91, parameter_11 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_102, full_int_array_4) + del add_102 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_103, full_int_array_4) + del add_103 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_104, full_int_array_4) + del add_104, full_int_array_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_92 = paddle._C_ops.matmul(transpose_44, transpose_45, False, True) + del transpose_44, transpose_45 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_13 = paddle._C_ops.scale(matmul_92, full_3, float("0"), True) + del full_3, matmul_92 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_105 = paddle._C_ops.add(scale_13, scale_1) + del scale_1, scale_13 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_11 = paddle._C_ops.softmax(add_105, -1) + del add_105 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_93 = paddle._C_ops.matmul(dropout_46, transpose_46, False, False) + del dropout_46, transpose_46 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_93, [0, 2, 1, 3]) + del matmul_93 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_5) + del full_int_array_5, transpose_47 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_47, parameter_10, False, False) + del parameter_10, reshape_47 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_106 = paddle._C_ops.add(matmul_94, parameter_9) + del matmul_94, parameter_9 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_106, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_106, full_2 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_107 = paddle._C_ops.add(layer_norm_66, dropout_48) + del dropout_48, layer_norm_66 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_107, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_107, parameter_7, parameter_8 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_69, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_5) + del matmul_95, parameter_5 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_108, False) + del add_108 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(gelu_11, parameter_4, False, False) + del gelu_11, parameter_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_109 = paddle._C_ops.add(matmul_96, parameter_3) + del matmul_96, parameter_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_110 = paddle._C_ops.add(add_109, layer_norm_69) + del add_109, layer_norm_69 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_110, parameter_17, parameter_18 + + # pd_op.slice: (1x768xf32) <- (1x21x768xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x768xf32) <- (1x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x768xf32) <- (1x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_97, parameter_1) + del matmul_97, parameter_1 + + # pd_op.tanh: (1x768xf32) <- (1x768xf32) + tanh_0 = paddle._C_ops.tanh(add_111) + del add_111, layer_norm_72 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-base-v1/weight_meta.py b/paddle_samples/PaddleNLP/albert-base-v1/weight_meta.py new file mode 100644 index 00000000..f9edf499 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v1/weight_meta.py @@ -0,0 +1,235 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950393") + max_val = float("0.0949818") + mean = float("2.1163e-05") + std = float("0.0199833") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0978883") + max_val = float("0.0982025") + mean = float("-9.18199e-06") + std = float("0.0199946") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101063") + max_val = float("0.100538") + mean = float("1.45909e-05") + std = float("0.0200083") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0869559") + max_val = float("0.10256") + mean = float("-1.37491e-05") + std = float("0.0200072") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.095139") + max_val = float("0.0959922") + mean = float("3.0233e-05") + std = float("0.0199653") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0921497") + max_val = float("0.0860873") + mean = float("3.58198e-05") + std = float("0.019985") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0895908") + max_val = float("0.08705") + mean = float("7.12779e-06") + std = float("0.0199925") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 768] + dtype = "float32" + min_val = float("-0.0824699") + max_val = float("0.0883701") + mean = float("5.90966e-05") + std = float("0.0199163") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0438373") + max_val = float("0.0567006") + mean = float("0.00175291") + std = float("0.0182297") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0952002") + max_val = float("0.0822103") + mean = float("0.000103211") + std = float("0.0200516") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [30000, 128] + dtype = "float32" + min_val = float("-0.0970852") + max_val = float("0.110504") + mean = float("5.30125e-06") + std = float("0.0200003") + data = None diff --git a/paddle_samples/PaddleNLP/albert-base-v2/graph_net.json b/paddle_samples/PaddleNLP/albert-base-v2/graph_net.json new file mode 100644 index 00000000..ae04e463 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v2/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-base-v2", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-base-v2/input_meta.py b/paddle_samples/PaddleNLP/albert-base-v2/input_meta.py new file mode 100644 index 00000000..b4583463 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v2/input_meta.py @@ -0,0 +1,41 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 21] + dtype = "int64" + data = [ + 2, + 10975, + 15, + 51, + 204, + 25, + 1909, + 9, + 31, + 589, + 2477, + 88, + 370, + 816, + 2761, + 17, + 66, + 2607, + 18, + 9, + 3, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 21] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 21] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-base-v2/model.py b/paddle_samples/PaddleNLP/albert-base-v2/model.py new file mode 100644 index 00000000..ec262473 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v2/model.py @@ -0,0 +1,2003 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x21xi64) <- (1x21xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x21xi64) <- (1x1x21xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x21xf32) <- (1x1x1x21xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x21xf32) <- (1x1x1x21xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [21] + + # pd_op.slice: (1x21xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 30000x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x21x128xf32, 1x21xf32, 1x21xf32) <- (1x21x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x128xf32, 128x768xf32) + matmul_0 = paddle._C_ops.matmul(layer_norm_0, parameter_20, False, False) + del layer_norm_0, parameter_20 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 21, 12, 64] + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_2, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_5 = paddle._C_ops.matmul(softmax_0, transpose_2, False, False) + del softmax_0, transpose_2 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_8 = paddle._C_ops.add(add_2, add_7) + del add_2, add_7 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.5"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_3 = paddle._C_ops.scale(add_9, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_0 = paddle._C_ops.pow(add_9, float("3")) + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.044715"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_4 = paddle._C_ops.scale(pow_0, full_4, float("0"), True) + del pow_0 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_10 = paddle._C_ops.add(add_9, scale_4) + del add_9, scale_4 + + # pd_op.full: (1xf32) <- () + full_5 = paddle._C_ops.full( + [1], float("0.797885"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_5 = paddle._C_ops.scale(add_10, full_5, float("0"), True) + del add_10 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_1 = paddle._C_ops.tanh(scale_5) + del scale_5 + + # pd_op.full: (1xf32) <- () + full_6 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_6 = paddle._C_ops.scale(tanh_1, full_6, float("1"), True) + del tanh_1 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_0 = paddle._C_ops.multiply(scale_3, scale_6) + del scale_3, scale_6 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_8 = paddle._C_ops.matmul(multiply_0, parameter_4, False, False) + del multiply_0 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_11 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_12 = paddle._C_ops.add(add_11, layer_norm_3) + del add_11, layer_norm_3 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_12, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_12 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_15, full_int_array_4) + del add_15 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_7 = paddle._C_ops.scale(matmul_12, full_2, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_16 = paddle._C_ops.add(scale_7, scale_1) + del scale_7 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_1 = paddle._C_ops.softmax(add_16, -1) + del add_16 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_13 = paddle._C_ops.matmul(softmax_1, transpose_6, False, False) + del softmax_1, transpose_6 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_18 = paddle._C_ops.add(layer_norm_6, add_17) + del add_17, layer_norm_6 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_8 = paddle._C_ops.scale(add_19, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_1 = paddle._C_ops.pow(add_19, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_9 = paddle._C_ops.scale(pow_1, full_4, float("0"), True) + del pow_1 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_20 = paddle._C_ops.add(add_19, scale_9) + del add_19, scale_9 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_10 = paddle._C_ops.scale(add_20, full_5, float("0"), True) + del add_20 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_2 = paddle._C_ops.tanh(scale_10) + del scale_10 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_11 = paddle._C_ops.scale(tanh_2, full_6, float("1"), True) + del tanh_2 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_1 = paddle._C_ops.multiply(scale_8, scale_11) + del scale_11, scale_8 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_16 = paddle._C_ops.matmul(multiply_1, parameter_4, False, False) + del multiply_1 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_22 = paddle._C_ops.add(add_21, layer_norm_9) + del add_21, layer_norm_9 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_22, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_22 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_23 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_25 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_24, full_int_array_4) + del add_24 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_25, full_int_array_4) + del add_25 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_12 = paddle._C_ops.scale(matmul_20, full_2, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_26 = paddle._C_ops.add(scale_12, scale_1) + del scale_12 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_2 = paddle._C_ops.softmax(add_26, -1) + del add_26 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_21 = paddle._C_ops.matmul(softmax_2, transpose_10, False, False) + del softmax_2, transpose_10 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_27 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_28 = paddle._C_ops.add(layer_norm_12, add_27) + del add_27, layer_norm_12 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_28 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_29 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_13 = paddle._C_ops.scale(add_29, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_2 = paddle._C_ops.pow(add_29, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_14 = paddle._C_ops.scale(pow_2, full_4, float("0"), True) + del pow_2 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_30 = paddle._C_ops.add(add_29, scale_14) + del add_29, scale_14 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_15 = paddle._C_ops.scale(add_30, full_5, float("0"), True) + del add_30 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_3 = paddle._C_ops.tanh(scale_15) + del scale_15 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_16 = paddle._C_ops.scale(tanh_3, full_6, float("1"), True) + del tanh_3 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_2 = paddle._C_ops.multiply(scale_13, scale_16) + del scale_13, scale_16 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_24 = paddle._C_ops.matmul(multiply_2, parameter_4, False, False) + del multiply_2 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_32 = paddle._C_ops.add(add_31, layer_norm_15) + del add_31, layer_norm_15 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_32, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_32 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_34 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_33, full_int_array_4) + del add_33 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_34, full_int_array_4) + del add_34 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_35, full_int_array_4) + del add_35 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_17 = paddle._C_ops.scale(matmul_28, full_2, float("0"), True) + del matmul_28 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_36 = paddle._C_ops.add(scale_17, scale_1) + del scale_17 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_3 = paddle._C_ops.softmax(add_36, -1) + del add_36 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_29 = paddle._C_ops.matmul(softmax_3, transpose_14, False, False) + del softmax_3, transpose_14 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del transpose_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del reshape_15 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_38 = paddle._C_ops.add(layer_norm_18, add_37) + del add_37, layer_norm_18 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_39 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_18 = paddle._C_ops.scale(add_39, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_3 = paddle._C_ops.pow(add_39, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_19 = paddle._C_ops.scale(pow_3, full_4, float("0"), True) + del pow_3 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_40 = paddle._C_ops.add(add_39, scale_19) + del add_39, scale_19 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_20 = paddle._C_ops.scale(add_40, full_5, float("0"), True) + del add_40 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_4 = paddle._C_ops.tanh(scale_20) + del scale_20 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_21 = paddle._C_ops.scale(tanh_4, full_6, float("1"), True) + del tanh_4 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_3 = paddle._C_ops.multiply(scale_18, scale_21) + del scale_18, scale_21 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_32 = paddle._C_ops.matmul(multiply_3, parameter_4, False, False) + del multiply_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_42 = paddle._C_ops.add(add_41, layer_norm_21) + del add_41, layer_norm_21 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_42 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_43 = paddle._C_ops.add(matmul_33, parameter_15) + del matmul_33 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_13) + del matmul_34 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_45 = paddle._C_ops.add(matmul_35, parameter_11) + del matmul_35 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_43, full_int_array_4) + del add_43 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_44, full_int_array_4) + del add_44 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_45, full_int_array_4) + del add_45 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_36 = paddle._C_ops.matmul(transpose_16, transpose_17, False, True) + del transpose_16, transpose_17 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_22 = paddle._C_ops.scale(matmul_36, full_2, float("0"), True) + del matmul_36 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_46 = paddle._C_ops.add(scale_22, scale_1) + del scale_22 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_4 = paddle._C_ops.softmax(add_46, -1) + del add_46 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_37 = paddle._C_ops.matmul(softmax_4, transpose_18, False, False) + del softmax_4, transpose_18 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_5) + del transpose_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_10, False, False) + del reshape_19 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_38, parameter_9) + del matmul_38 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_48 = paddle._C_ops.add(layer_norm_24, add_47) + del add_47, layer_norm_24 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_48 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_49 = paddle._C_ops.add(matmul_39, parameter_5) + del matmul_39 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_23 = paddle._C_ops.scale(add_49, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_4 = paddle._C_ops.pow(add_49, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_24 = paddle._C_ops.scale(pow_4, full_4, float("0"), True) + del pow_4 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_50 = paddle._C_ops.add(add_49, scale_24) + del add_49, scale_24 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_25 = paddle._C_ops.scale(add_50, full_5, float("0"), True) + del add_50 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_5 = paddle._C_ops.tanh(scale_25) + del scale_25 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_26 = paddle._C_ops.scale(tanh_5, full_6, float("1"), True) + del tanh_5 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_4 = paddle._C_ops.multiply(scale_23, scale_26) + del scale_23, scale_26 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_40 = paddle._C_ops.matmul(multiply_4, parameter_4, False, False) + del multiply_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_51 = paddle._C_ops.add(matmul_40, parameter_3) + del matmul_40 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_52 = paddle._C_ops.add(add_51, layer_norm_27) + del add_51, layer_norm_27 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_52, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_52 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_53 = paddle._C_ops.add(matmul_41, parameter_15) + del matmul_41 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_54 = paddle._C_ops.add(matmul_42, parameter_13) + del matmul_42 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_43, parameter_11) + del matmul_43 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_53, full_int_array_4) + del add_53 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_54, full_int_array_4) + del add_54 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_55, full_int_array_4) + del add_55 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_44 = paddle._C_ops.matmul(transpose_20, transpose_21, False, True) + del transpose_20, transpose_21 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_27 = paddle._C_ops.scale(matmul_44, full_2, float("0"), True) + del matmul_44 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_56 = paddle._C_ops.add(scale_27, scale_1) + del scale_27 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_5 = paddle._C_ops.softmax(add_56, -1) + del add_56 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_45 = paddle._C_ops.matmul(softmax_5, transpose_22, False, False) + del softmax_5, transpose_22 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_5) + del transpose_23 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_10, False, False) + del reshape_23 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_46, parameter_9) + del matmul_46 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_58 = paddle._C_ops.add(layer_norm_30, add_57) + del add_57, layer_norm_30 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_58, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_58 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_59 = paddle._C_ops.add(matmul_47, parameter_5) + del matmul_47 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_28 = paddle._C_ops.scale(add_59, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_5 = paddle._C_ops.pow(add_59, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_29 = paddle._C_ops.scale(pow_5, full_4, float("0"), True) + del pow_5 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_60 = paddle._C_ops.add(add_59, scale_29) + del add_59, scale_29 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_30 = paddle._C_ops.scale(add_60, full_5, float("0"), True) + del add_60 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_6 = paddle._C_ops.tanh(scale_30) + del scale_30 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_31 = paddle._C_ops.scale(tanh_6, full_6, float("1"), True) + del tanh_6 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_5 = paddle._C_ops.multiply(scale_28, scale_31) + del scale_28, scale_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_48 = paddle._C_ops.matmul(multiply_5, parameter_4, False, False) + del multiply_5 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_48, parameter_3) + del matmul_48 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_62 = paddle._C_ops.add(add_61, layer_norm_33) + del add_61, layer_norm_33 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_62 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_36, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_63 = paddle._C_ops.add(matmul_49, parameter_15) + del matmul_49 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_36, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_50, parameter_13) + del matmul_50 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(layer_norm_36, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_65 = paddle._C_ops.add(matmul_51, parameter_11) + del matmul_51 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_63, full_int_array_4) + del add_63 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_64, full_int_array_4) + del add_64 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_65, full_int_array_4) + del add_65 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_52 = paddle._C_ops.matmul(transpose_24, transpose_25, False, True) + del transpose_24, transpose_25 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_32 = paddle._C_ops.scale(matmul_52, full_2, float("0"), True) + del matmul_52 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_66 = paddle._C_ops.add(scale_32, scale_1) + del scale_32 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_6 = paddle._C_ops.softmax(add_66, -1) + del add_66 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_53 = paddle._C_ops.matmul(softmax_6, transpose_26, False, False) + del softmax_6, transpose_26 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_5) + del transpose_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_10, False, False) + del reshape_27 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_54, parameter_9) + del matmul_54 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_68 = paddle._C_ops.add(layer_norm_36, add_67) + del add_67, layer_norm_36 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_68, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_68 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_39, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_69 = paddle._C_ops.add(matmul_55, parameter_5) + del matmul_55 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_33 = paddle._C_ops.scale(add_69, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_6 = paddle._C_ops.pow(add_69, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_34 = paddle._C_ops.scale(pow_6, full_4, float("0"), True) + del pow_6 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_70 = paddle._C_ops.add(add_69, scale_34) + del add_69, scale_34 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_35 = paddle._C_ops.scale(add_70, full_5, float("0"), True) + del add_70 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_7 = paddle._C_ops.tanh(scale_35) + del scale_35 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_36 = paddle._C_ops.scale(tanh_7, full_6, float("1"), True) + del tanh_7 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_6 = paddle._C_ops.multiply(scale_33, scale_36) + del scale_33, scale_36 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(multiply_6, parameter_4, False, False) + del multiply_6 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_71 = paddle._C_ops.add(matmul_56, parameter_3) + del matmul_56 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_72 = paddle._C_ops.add(add_71, layer_norm_39) + del add_71, layer_norm_39 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_72, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_72 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_42, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_73 = paddle._C_ops.add(matmul_57, parameter_15) + del matmul_57 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_42, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_74 = paddle._C_ops.add(matmul_58, parameter_13) + del matmul_58 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(layer_norm_42, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_59, parameter_11) + del matmul_59 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_73, full_int_array_4) + del add_73 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_74, full_int_array_4) + del add_74 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_75, full_int_array_4) + del add_75 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_60 = paddle._C_ops.matmul(transpose_28, transpose_29, False, True) + del transpose_28, transpose_29 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_37 = paddle._C_ops.scale(matmul_60, full_2, float("0"), True) + del matmul_60 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_76 = paddle._C_ops.add(scale_37, scale_1) + del scale_37 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_7 = paddle._C_ops.softmax(add_76, -1) + del add_76 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_61 = paddle._C_ops.matmul(softmax_7, transpose_30, False, False) + del softmax_7, transpose_30 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_61, [0, 2, 1, 3]) + del matmul_61 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_5) + del transpose_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(reshape_31, parameter_10, False, False) + del reshape_31 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_77 = paddle._C_ops.add(matmul_62, parameter_9) + del matmul_62 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_78 = paddle._C_ops.add(layer_norm_42, add_77) + del add_77, layer_norm_42 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_78, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_78 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_45, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_79 = paddle._C_ops.add(matmul_63, parameter_5) + del matmul_63 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_38 = paddle._C_ops.scale(add_79, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_7 = paddle._C_ops.pow(add_79, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_39 = paddle._C_ops.scale(pow_7, full_4, float("0"), True) + del pow_7 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_80 = paddle._C_ops.add(add_79, scale_39) + del add_79, scale_39 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_40 = paddle._C_ops.scale(add_80, full_5, float("0"), True) + del add_80 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_8 = paddle._C_ops.tanh(scale_40) + del scale_40 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_41 = paddle._C_ops.scale(tanh_8, full_6, float("1"), True) + del tanh_8 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_7 = paddle._C_ops.multiply(scale_38, scale_41) + del scale_38, scale_41 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_64 = paddle._C_ops.matmul(multiply_7, parameter_4, False, False) + del multiply_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_81 = paddle._C_ops.add(matmul_64, parameter_3) + del matmul_64 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_82 = paddle._C_ops.add(add_81, layer_norm_45) + del add_81, layer_norm_45 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_82, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_82 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_48, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_65, parameter_15) + del matmul_65 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_48, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_66, parameter_13) + del matmul_66 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_48, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_67, parameter_11) + del matmul_67 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_83, full_int_array_4) + del add_83 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_84, full_int_array_4) + del add_84 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_85, full_int_array_4) + del add_85 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_68 = paddle._C_ops.matmul(transpose_32, transpose_33, False, True) + del transpose_32, transpose_33 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_42 = paddle._C_ops.scale(matmul_68, full_2, float("0"), True) + del matmul_68 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_86 = paddle._C_ops.add(scale_42, scale_1) + del scale_42 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_8 = paddle._C_ops.softmax(add_86, -1) + del add_86 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_69 = paddle._C_ops.matmul(softmax_8, transpose_34, False, False) + del softmax_8, transpose_34 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_5) + del transpose_35 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_70 = paddle._C_ops.matmul(reshape_35, parameter_10, False, False) + del reshape_35 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_70, parameter_9) + del matmul_70 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_88 = paddle._C_ops.add(layer_norm_48, add_87) + del add_87, layer_norm_48 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_88 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_71 = paddle._C_ops.matmul(layer_norm_51, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_89 = paddle._C_ops.add(matmul_71, parameter_5) + del matmul_71 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_43 = paddle._C_ops.scale(add_89, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_8 = paddle._C_ops.pow(add_89, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_44 = paddle._C_ops.scale(pow_8, full_4, float("0"), True) + del pow_8 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_90 = paddle._C_ops.add(add_89, scale_44) + del add_89, scale_44 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_45 = paddle._C_ops.scale(add_90, full_5, float("0"), True) + del add_90 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_9 = paddle._C_ops.tanh(scale_45) + del scale_45 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_46 = paddle._C_ops.scale(tanh_9, full_6, float("1"), True) + del tanh_9 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_8 = paddle._C_ops.multiply(scale_43, scale_46) + del scale_43, scale_46 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_72 = paddle._C_ops.matmul(multiply_8, parameter_4, False, False) + del multiply_8 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_91 = paddle._C_ops.add(matmul_72, parameter_3) + del matmul_72 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_92 = paddle._C_ops.add(add_91, layer_norm_51) + del add_91, layer_norm_51 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_92, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_92 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_54, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_73, parameter_15) + del matmul_73 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_54, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_74, parameter_13) + del matmul_74 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_54, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_95 = paddle._C_ops.add(matmul_75, parameter_11) + del matmul_75 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_93, full_int_array_4) + del add_93 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_94, full_int_array_4) + del add_94 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_95, full_int_array_4) + del add_95 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_76 = paddle._C_ops.matmul(transpose_36, transpose_37, False, True) + del transpose_36, transpose_37 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_47 = paddle._C_ops.scale(matmul_76, full_2, float("0"), True) + del matmul_76 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_96 = paddle._C_ops.add(scale_47, scale_1) + del scale_47 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_9 = paddle._C_ops.softmax(add_96, -1) + del add_96 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_77 = paddle._C_ops.matmul(softmax_9, transpose_38, False, False) + del softmax_9, transpose_38 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_77, [0, 2, 1, 3]) + del matmul_77 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_5) + del transpose_39 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(reshape_39, parameter_10, False, False) + del reshape_39 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_78, parameter_9) + del matmul_78 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_98 = paddle._C_ops.add(layer_norm_54, add_97) + del add_97, layer_norm_54 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_98 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_57, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_99 = paddle._C_ops.add(matmul_79, parameter_5) + del matmul_79 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_48 = paddle._C_ops.scale(add_99, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_9 = paddle._C_ops.pow(add_99, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_49 = paddle._C_ops.scale(pow_9, full_4, float("0"), True) + del pow_9 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_100 = paddle._C_ops.add(add_99, scale_49) + del add_99, scale_49 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_50 = paddle._C_ops.scale(add_100, full_5, float("0"), True) + del add_100 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_10 = paddle._C_ops.tanh(scale_50) + del scale_50 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_51 = paddle._C_ops.scale(tanh_10, full_6, float("1"), True) + del tanh_10 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_9 = paddle._C_ops.multiply(scale_48, scale_51) + del scale_48, scale_51 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_80 = paddle._C_ops.matmul(multiply_9, parameter_4, False, False) + del multiply_9 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_101 = paddle._C_ops.add(matmul_80, parameter_3) + del matmul_80 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_102 = paddle._C_ops.add(add_101, layer_norm_57) + del add_101, layer_norm_57 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_102, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_102 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_60, parameter_16, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_81, parameter_15) + del matmul_81 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_60, parameter_14, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_104 = paddle._C_ops.add(matmul_82, parameter_13) + del matmul_82 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_60, parameter_12, False, False) + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_83, parameter_11) + del matmul_83 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_103, full_int_array_4) + del add_103 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_104, full_int_array_4) + del add_104 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_105, full_int_array_4) + del add_105 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_84 = paddle._C_ops.matmul(transpose_40, transpose_41, False, True) + del transpose_40, transpose_41 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_52 = paddle._C_ops.scale(matmul_84, full_2, float("0"), True) + del matmul_84 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_106 = paddle._C_ops.add(scale_52, scale_1) + del scale_52 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_10 = paddle._C_ops.softmax(add_106, -1) + del add_106 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_85 = paddle._C_ops.matmul(softmax_10, transpose_42, False, False) + del softmax_10, transpose_42 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_85, [0, 2, 1, 3]) + del matmul_85 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_5) + del transpose_43 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_86 = paddle._C_ops.matmul(reshape_43, parameter_10, False, False) + del reshape_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_107 = paddle._C_ops.add(matmul_86, parameter_9) + del matmul_86 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_108 = paddle._C_ops.add(layer_norm_60, add_107) + del add_107, layer_norm_60 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_108, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_108 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_87 = paddle._C_ops.matmul(layer_norm_63, parameter_6, False, False) + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_109 = paddle._C_ops.add(matmul_87, parameter_5) + del matmul_87 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_53 = paddle._C_ops.scale(add_109, full_3, float("0"), True) + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_10 = paddle._C_ops.pow(add_109, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_54 = paddle._C_ops.scale(pow_10, full_4, float("0"), True) + del pow_10 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_110 = paddle._C_ops.add(add_109, scale_54) + del add_109, scale_54 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_55 = paddle._C_ops.scale(add_110, full_5, float("0"), True) + del add_110 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_11 = paddle._C_ops.tanh(scale_55) + del scale_55 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_56 = paddle._C_ops.scale(tanh_11, full_6, float("1"), True) + del tanh_11 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_10 = paddle._C_ops.multiply(scale_53, scale_56) + del scale_53, scale_56 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_88 = paddle._C_ops.matmul(multiply_10, parameter_4, False, False) + del multiply_10 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_88, parameter_3) + del matmul_88 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_112 = paddle._C_ops.add(add_111, layer_norm_63) + del add_111, layer_norm_63 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_112 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_66, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_113 = paddle._C_ops.add(matmul_89, parameter_15) + del matmul_89, parameter_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_66, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_114 = paddle._C_ops.add(matmul_90, parameter_13) + del matmul_90, parameter_13 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_66, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_115 = paddle._C_ops.add(matmul_91, parameter_11) + del matmul_91, parameter_11 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_113, full_int_array_4) + del add_113 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_114, full_int_array_4) + del add_114 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_115, full_int_array_4) + del add_115, full_int_array_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_92 = paddle._C_ops.matmul(transpose_44, transpose_45, False, True) + del transpose_44, transpose_45 + + # pd_op.scale: (1x12x21x21xf32) <- (1x12x21x21xf32, 1xf32) + scale_57 = paddle._C_ops.scale(matmul_92, full_2, float("0"), True) + del full_2, matmul_92 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_116 = paddle._C_ops.add(scale_57, scale_1) + del scale_1, scale_57 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_11 = paddle._C_ops.softmax(add_116, -1) + del add_116 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_93 = paddle._C_ops.matmul(softmax_11, transpose_46, False, False) + del softmax_11, transpose_46 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_93, [0, 2, 1, 3]) + del matmul_93 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_5) + del full_int_array_5, transpose_47 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_47, parameter_10, False, False) + del parameter_10, reshape_47 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_117 = paddle._C_ops.add(matmul_94, parameter_9) + del matmul_94, parameter_9 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_118 = paddle._C_ops.add(layer_norm_66, add_117) + del add_117, layer_norm_66 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_118, parameter_7, parameter_8 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_69, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_119 = paddle._C_ops.add(matmul_95, parameter_5) + del matmul_95, parameter_5 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_58 = paddle._C_ops.scale(add_119, full_3, float("0"), True) + del full_3 + + # pd_op.pow: (1x21x3072xf32) <- (1x21x3072xf32) + pow_11 = paddle._C_ops.pow(add_119, float("3")) + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_59 = paddle._C_ops.scale(pow_11, full_4, float("0"), True) + del full_4, pow_11 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + add_120 = paddle._C_ops.add(add_119, scale_59) + del add_119, scale_59 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_60 = paddle._C_ops.scale(add_120, full_5, float("0"), True) + del add_120, full_5 + + # pd_op.tanh: (1x21x3072xf32) <- (1x21x3072xf32) + tanh_12 = paddle._C_ops.tanh(scale_60) + del scale_60 + + # pd_op.scale: (1x21x3072xf32) <- (1x21x3072xf32, 1xf32) + scale_61 = paddle._C_ops.scale(tanh_12, full_6, float("1"), True) + del full_6, tanh_12 + + # pd_op.multiply: (1x21x3072xf32) <- (1x21x3072xf32, 1x21x3072xf32) + multiply_11 = paddle._C_ops.multiply(scale_58, scale_61) + del scale_58, scale_61 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(multiply_11, parameter_4, False, False) + del multiply_11, parameter_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_121 = paddle._C_ops.add(matmul_96, parameter_3) + del matmul_96, parameter_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_122 = paddle._C_ops.add(add_121, layer_norm_69) + del add_121, layer_norm_69 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_122, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_122, parameter_17, parameter_18 + + # pd_op.slice: (1x768xf32) <- (1x21x768xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x768xf32) <- (1x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x768xf32) <- (1x768xf32, 768xf32) + add_123 = paddle._C_ops.add(matmul_97, parameter_1) + del matmul_97, parameter_1 + + # pd_op.tanh: (1x768xf32) <- (1x768xf32) + tanh_0 = paddle._C_ops.tanh(add_123) + del add_123, layer_norm_72 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-base-v2/weight_meta.py b/paddle_samples/PaddleNLP/albert-base-v2/weight_meta.py new file mode 100644 index 00000000..7588cbb6 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-base-v2/weight_meta.py @@ -0,0 +1,235 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0887999") + max_val = float("0.107858") + mean = float("9.86864e-06") + std = float("0.0199999") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.101961") + max_val = float("0.107306") + mean = float("-1.01831e-05") + std = float("0.0199888") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0937552") + max_val = float("0.100479") + mean = float("4.65614e-06") + std = float("0.0200006") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0929592") + max_val = float("0.0998105") + mean = float("4.01795e-05") + std = float("0.0200188") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0910254") + max_val = float("0.0940073") + mean = float("1.1987e-05") + std = float("0.02001") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0958641") + max_val = float("0.0952735") + mean = float("-4.36463e-05") + std = float("0.0199898") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0925532") + max_val = float("0.0965498") + mean = float("-4.51693e-05") + std = float("0.0200245") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 768] + dtype = "float32" + min_val = float("-0.0868425") + max_val = float("0.0925445") + mean = float("-0.000119993") + std = float("0.0200474") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0554865") + max_val = float("0.0556627") + mean = float("-0.00351806") + std = float("0.0195563") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0802345") + max_val = float("0.0806108") + mean = float("4.65631e-05") + std = float("0.0200016") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [30000, 128] + dtype = "float32" + min_val = float("-0.103231") + max_val = float("0.10054") + mean = float("-1.42952e-05") + std = float("0.0199984") + data = None diff --git a/paddle_samples/PaddleNLP/albert-chinese-base/graph_net.json b/paddle_samples/PaddleNLP/albert-chinese-base/graph_net.json new file mode 100644 index 00000000..16cf1535 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-base/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-chinese-base", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-chinese-base/input_meta.py b/paddle_samples/PaddleNLP/albert-chinese-base/input_meta.py new file mode 100644 index 00000000..3708564f --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-base/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3614, 6816, 886, 4500, 4636, 2428, 7607, 3444, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-chinese-base/model.py b/paddle_samples/PaddleNLP/albert-chinese-base/model.py new file mode 100644 index 00000000..51da3109 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-base/model.py @@ -0,0 +1,1670 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x11xi64) <- (1x11xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x11xi64) <- (1x1x11xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x11xf32) <- (1x1x1x11xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [11] + + # pd_op.slice: (1x11xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 21128x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x11x128xf32, 1x11xf32, 1x11xf32) <- (1x11x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x128xf32, 128x768xf32) + matmul_0 = paddle._C_ops.matmul(layer_norm_0, parameter_20, False, False) + del layer_norm_0, parameter_20 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 11, 12, 64] + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_2, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_5 = paddle._C_ops.matmul(softmax_0, transpose_2, False, False) + del softmax_0, transpose_2 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_8 = paddle._C_ops.add(add_2, add_7) + del add_2, add_7 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_0 = paddle._C_ops.relu(add_9) + del add_9 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_8 = paddle._C_ops.matmul(relu_0, parameter_4, False, False) + del relu_0 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_11 = paddle._C_ops.add(add_10, layer_norm_3) + del add_10, layer_norm_3 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_4) + del add_12 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_3 = paddle._C_ops.scale(matmul_12, full_2, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_15 = paddle._C_ops.add(scale_3, scale_1) + del scale_3 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_13 = paddle._C_ops.matmul(softmax_1, transpose_6, False, False) + del softmax_1, transpose_6 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_17 = paddle._C_ops.add(layer_norm_6, add_16) + del add_16, layer_norm_6 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_1 = paddle._C_ops.relu(add_18) + del add_18 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_16 = paddle._C_ops.matmul(relu_1, parameter_4, False, False) + del relu_1 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_20 = paddle._C_ops.add(add_19, layer_norm_9) + del add_19, layer_norm_9 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_4) + del add_21 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_4) + del add_22 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_4 = paddle._C_ops.scale(matmul_20, full_2, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_24 = paddle._C_ops.add(scale_4, scale_1) + del scale_4 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_21 = paddle._C_ops.matmul(softmax_2, transpose_10, False, False) + del softmax_2, transpose_10 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_26 = paddle._C_ops.add(layer_norm_12, add_25) + del add_25, layer_norm_12 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_2 = paddle._C_ops.relu(add_27) + del add_27 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_24 = paddle._C_ops.matmul(relu_2, parameter_4, False, False) + del relu_2 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_29 = paddle._C_ops.add(add_28, layer_norm_15) + del add_28, layer_norm_15 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_4) + del add_30 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_4) + del add_31 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_4) + del add_32 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_28, full_2, float("0"), True) + del matmul_28 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_33 = paddle._C_ops.add(scale_5, scale_1) + del scale_5 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_29 = paddle._C_ops.matmul(softmax_3, transpose_14, False, False) + del softmax_3, transpose_14 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del transpose_15 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del reshape_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_35 = paddle._C_ops.add(layer_norm_18, add_34) + del add_34, layer_norm_18 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_3 = paddle._C_ops.relu(add_36) + del add_36 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_32 = paddle._C_ops.matmul(relu_3, parameter_4, False, False) + del relu_3 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_38 = paddle._C_ops.add(add_37, layer_norm_21) + del add_37, layer_norm_21 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_15) + del matmul_33 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_13) + del matmul_34 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_35, parameter_11) + del matmul_35 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_39, full_int_array_4) + del add_39 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_40, full_int_array_4) + del add_40 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_41, full_int_array_4) + del add_41 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_36 = paddle._C_ops.matmul(transpose_16, transpose_17, False, True) + del transpose_16, transpose_17 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_6 = paddle._C_ops.scale(matmul_36, full_2, float("0"), True) + del matmul_36 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_42 = paddle._C_ops.add(scale_6, scale_1) + del scale_6 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_37 = paddle._C_ops.matmul(softmax_4, transpose_18, False, False) + del softmax_4, transpose_18 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_5) + del transpose_19 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_10, False, False) + del reshape_19 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_43 = paddle._C_ops.add(matmul_38, parameter_9) + del matmul_38 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_44 = paddle._C_ops.add(layer_norm_24, add_43) + del add_43, layer_norm_24 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_44, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_44 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_5) + del matmul_39 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_4 = paddle._C_ops.relu(add_45) + del add_45 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_40 = paddle._C_ops.matmul(relu_4, parameter_4, False, False) + del relu_4 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_46 = paddle._C_ops.add(matmul_40, parameter_3) + del matmul_40 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_47 = paddle._C_ops.add(add_46, layer_norm_27) + del add_46, layer_norm_27 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_47, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_47 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_15) + del matmul_41 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_13) + del matmul_42 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_50 = paddle._C_ops.add(matmul_43, parameter_11) + del matmul_43 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_48, full_int_array_4) + del add_48 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_49, full_int_array_4) + del add_49 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_50, full_int_array_4) + del add_50 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_44 = paddle._C_ops.matmul(transpose_20, transpose_21, False, True) + del transpose_20, transpose_21 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_7 = paddle._C_ops.scale(matmul_44, full_2, float("0"), True) + del matmul_44 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_51 = paddle._C_ops.add(scale_7, scale_1) + del scale_7 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_51, -1) + del add_51 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_45 = paddle._C_ops.matmul(softmax_5, transpose_22, False, False) + del softmax_5, transpose_22 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_5) + del transpose_23 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_10, False, False) + del reshape_23 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_46, parameter_9) + del matmul_46 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_53 = paddle._C_ops.add(layer_norm_30, add_52) + del add_52, layer_norm_30 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_53 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_5) + del matmul_47 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_5 = paddle._C_ops.relu(add_54) + del add_54 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_48 = paddle._C_ops.matmul(relu_5, parameter_4, False, False) + del relu_5 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_48, parameter_3) + del matmul_48 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_56 = paddle._C_ops.add(add_55, layer_norm_33) + del add_55, layer_norm_33 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_56 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_36, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_15) + del matmul_49 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_36, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_13) + del matmul_50 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(layer_norm_36, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_59 = paddle._C_ops.add(matmul_51, parameter_11) + del matmul_51 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_57, full_int_array_4) + del add_57 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_58, full_int_array_4) + del add_58 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_59, full_int_array_4) + del add_59 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_52 = paddle._C_ops.matmul(transpose_24, transpose_25, False, True) + del transpose_24, transpose_25 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_8 = paddle._C_ops.scale(matmul_52, full_2, float("0"), True) + del matmul_52 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_60 = paddle._C_ops.add(scale_8, scale_1) + del scale_8 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_6 = paddle._C_ops.softmax(add_60, -1) + del add_60 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_53 = paddle._C_ops.matmul(softmax_6, transpose_26, False, False) + del softmax_6, transpose_26 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_5) + del transpose_27 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_10, False, False) + del reshape_27 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_54, parameter_9) + del matmul_54 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_62 = paddle._C_ops.add(layer_norm_36, add_61) + del add_61, layer_norm_36 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_62 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_39, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_5) + del matmul_55 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_6 = paddle._C_ops.relu(add_63) + del add_63 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(relu_6, parameter_4, False, False) + del relu_6 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_56, parameter_3) + del matmul_56 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_65 = paddle._C_ops.add(add_64, layer_norm_39) + del add_64, layer_norm_39 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_65 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_42, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_15) + del matmul_57 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_42, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_13) + del matmul_58 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(layer_norm_42, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_68 = paddle._C_ops.add(matmul_59, parameter_11) + del matmul_59 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_66, full_int_array_4) + del add_66 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_67, full_int_array_4) + del add_67 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_68, full_int_array_4) + del add_68 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_60 = paddle._C_ops.matmul(transpose_28, transpose_29, False, True) + del transpose_28, transpose_29 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_9 = paddle._C_ops.scale(matmul_60, full_2, float("0"), True) + del matmul_60 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_69 = paddle._C_ops.add(scale_9, scale_1) + del scale_9 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_7 = paddle._C_ops.softmax(add_69, -1) + del add_69 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_61 = paddle._C_ops.matmul(softmax_7, transpose_30, False, False) + del softmax_7, transpose_30 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_61, [0, 2, 1, 3]) + del matmul_61 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_5) + del transpose_31 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(reshape_31, parameter_10, False, False) + del reshape_31 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_70 = paddle._C_ops.add(matmul_62, parameter_9) + del matmul_62 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_71 = paddle._C_ops.add(layer_norm_42, add_70) + del add_70, layer_norm_42 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_71, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_71 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_45, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_5) + del matmul_63 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_7 = paddle._C_ops.relu(add_72) + del add_72 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_64 = paddle._C_ops.matmul(relu_7, parameter_4, False, False) + del relu_7 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_73 = paddle._C_ops.add(matmul_64, parameter_3) + del matmul_64 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_74 = paddle._C_ops.add(add_73, layer_norm_45) + del add_73, layer_norm_45 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_74, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_74 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_48, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_15) + del matmul_65 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_48, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_13) + del matmul_66 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_48, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_77 = paddle._C_ops.add(matmul_67, parameter_11) + del matmul_67 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_75, full_int_array_4) + del add_75 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_76, full_int_array_4) + del add_76 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_77, full_int_array_4) + del add_77 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_68 = paddle._C_ops.matmul(transpose_32, transpose_33, False, True) + del transpose_32, transpose_33 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_10 = paddle._C_ops.scale(matmul_68, full_2, float("0"), True) + del matmul_68 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_78 = paddle._C_ops.add(scale_10, scale_1) + del scale_10 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_8 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_69 = paddle._C_ops.matmul(softmax_8, transpose_34, False, False) + del softmax_8, transpose_34 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_5) + del transpose_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_70 = paddle._C_ops.matmul(reshape_35, parameter_10, False, False) + del reshape_35 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_79 = paddle._C_ops.add(matmul_70, parameter_9) + del matmul_70 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_80 = paddle._C_ops.add(layer_norm_48, add_79) + del add_79, layer_norm_48 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_80 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_71 = paddle._C_ops.matmul(layer_norm_51, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_5) + del matmul_71 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_8 = paddle._C_ops.relu(add_81) + del add_81 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_72 = paddle._C_ops.matmul(relu_8, parameter_4, False, False) + del relu_8 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_82 = paddle._C_ops.add(matmul_72, parameter_3) + del matmul_72 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_83 = paddle._C_ops.add(add_82, layer_norm_51) + del add_82, layer_norm_51 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_83 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_54, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_15) + del matmul_73 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_54, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_13) + del matmul_74 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_54, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_75, parameter_11) + del matmul_75 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_84, full_int_array_4) + del add_84 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_85, full_int_array_4) + del add_85 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_86, full_int_array_4) + del add_86 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_76 = paddle._C_ops.matmul(transpose_36, transpose_37, False, True) + del transpose_36, transpose_37 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_11 = paddle._C_ops.scale(matmul_76, full_2, float("0"), True) + del matmul_76 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_87 = paddle._C_ops.add(scale_11, scale_1) + del scale_11 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_9 = paddle._C_ops.softmax(add_87, -1) + del add_87 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_77 = paddle._C_ops.matmul(softmax_9, transpose_38, False, False) + del softmax_9, transpose_38 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_77, [0, 2, 1, 3]) + del matmul_77 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_5) + del transpose_39 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(reshape_39, parameter_10, False, False) + del reshape_39 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_88 = paddle._C_ops.add(matmul_78, parameter_9) + del matmul_78 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_89 = paddle._C_ops.add(layer_norm_54, add_88) + del add_88, layer_norm_54 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_89, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_89 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_57, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_5) + del matmul_79 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_9 = paddle._C_ops.relu(add_90) + del add_90 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_80 = paddle._C_ops.matmul(relu_9, parameter_4, False, False) + del relu_9 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_91 = paddle._C_ops.add(matmul_80, parameter_3) + del matmul_80 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_92 = paddle._C_ops.add(add_91, layer_norm_57) + del add_91, layer_norm_57 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_92, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_92 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_60, parameter_16, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_15) + del matmul_81 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_60, parameter_14, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_13) + del matmul_82 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_60, parameter_12, False, False) + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_95 = paddle._C_ops.add(matmul_83, parameter_11) + del matmul_83 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_93, full_int_array_4) + del add_93 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_94, full_int_array_4) + del add_94 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_95, full_int_array_4) + del add_95 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_84 = paddle._C_ops.matmul(transpose_40, transpose_41, False, True) + del transpose_40, transpose_41 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_12 = paddle._C_ops.scale(matmul_84, full_2, float("0"), True) + del matmul_84 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_96 = paddle._C_ops.add(scale_12, scale_1) + del scale_12 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_10 = paddle._C_ops.softmax(add_96, -1) + del add_96 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_85 = paddle._C_ops.matmul(softmax_10, transpose_42, False, False) + del softmax_10, transpose_42 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_85, [0, 2, 1, 3]) + del matmul_85 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_5) + del transpose_43 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_86 = paddle._C_ops.matmul(reshape_43, parameter_10, False, False) + del reshape_43 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_86, parameter_9) + del matmul_86 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_98 = paddle._C_ops.add(layer_norm_60, add_97) + del add_97, layer_norm_60 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_98 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_87 = paddle._C_ops.matmul(layer_norm_63, parameter_6, False, False) + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_5) + del matmul_87 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_10 = paddle._C_ops.relu(add_99) + del add_99 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_88 = paddle._C_ops.matmul(relu_10, parameter_4, False, False) + del relu_10 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_100 = paddle._C_ops.add(matmul_88, parameter_3) + del matmul_88 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_101 = paddle._C_ops.add(add_100, layer_norm_63) + del add_100, layer_norm_63 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_101 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_66, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_15) + del matmul_89, parameter_15 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_66, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_13) + del matmul_90, parameter_13 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_66, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_104 = paddle._C_ops.add(matmul_91, parameter_11) + del matmul_91, parameter_11 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_102, full_int_array_4) + del add_102 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_103, full_int_array_4) + del add_103 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_104, full_int_array_4) + del add_104, full_int_array_4 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_92 = paddle._C_ops.matmul(transpose_44, transpose_45, False, True) + del transpose_44, transpose_45 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_13 = paddle._C_ops.scale(matmul_92, full_2, float("0"), True) + del full_2, matmul_92 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_105 = paddle._C_ops.add(scale_13, scale_1) + del scale_1, scale_13 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_11 = paddle._C_ops.softmax(add_105, -1) + del add_105 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_93 = paddle._C_ops.matmul(softmax_11, transpose_46, False, False) + del softmax_11, transpose_46 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_93, [0, 2, 1, 3]) + del matmul_93 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_5) + del full_int_array_5, transpose_47 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_47, parameter_10, False, False) + del parameter_10, reshape_47 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_106 = paddle._C_ops.add(matmul_94, parameter_9) + del matmul_94, parameter_9 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_107 = paddle._C_ops.add(layer_norm_66, add_106) + del add_106, layer_norm_66 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_107, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_107, parameter_7, parameter_8 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_69, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_5) + del matmul_95, parameter_5 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_11 = paddle._C_ops.relu(add_108) + del add_108 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(relu_11, parameter_4, False, False) + del parameter_4, relu_11 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_109 = paddle._C_ops.add(matmul_96, parameter_3) + del matmul_96, parameter_3 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_110 = paddle._C_ops.add(add_109, layer_norm_69) + del add_109, layer_norm_69 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_110, parameter_17, parameter_18 + + # pd_op.slice: (1x768xf32) <- (1x11x768xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x768xf32) <- (1x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x768xf32) <- (1x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_97, parameter_1) + del matmul_97, parameter_1 + + # pd_op.tanh: (1x768xf32) <- (1x768xf32) + tanh_0 = paddle._C_ops.tanh(add_111) + del add_111, layer_norm_72 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-chinese-base/weight_meta.py b/paddle_samples/PaddleNLP/albert-chinese-base/weight_meta.py new file mode 100644 index 00000000..d9bd49e4 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-base/weight_meta.py @@ -0,0 +1,235 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0892679") + max_val = float("0.0906134") + mean = float("-3.00388e-06") + std = float("0.0199841") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.101152") + max_val = float("0.0999676") + mean = float("1.01817e-05") + std = float("0.019991") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0942874") + max_val = float("0.0995946") + mean = float("2.76684e-07") + std = float("0.0199973") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0992178") + max_val = float("0.0932706") + mean = float("2.48208e-05") + std = float("0.0200258") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0932884") + max_val = float("0.0917139") + mean = float("1.4562e-05") + std = float("0.0200167") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0930002") + max_val = float("0.0961642") + mean = float("-5.85622e-05") + std = float("0.019987") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916871") + max_val = float("0.089794") + mean = float("-3.99677e-05") + std = float("0.0200198") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 768] + dtype = "float32" + min_val = float("-0.0860205") + max_val = float("0.0973591") + mean = float("2.12175e-05") + std = float("0.0200251") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0590662") + max_val = float("0.0505173") + mean = float("-0.000780354") + std = float("0.0203115") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0905854") + max_val = float("0.0949802") + mean = float("1.62754e-05") + std = float("0.019975") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [21128, 128] + dtype = "float32" + min_val = float("-0.0995291") + max_val = float("0.0971798") + mean = float("-5.40105e-06") + std = float("0.0200164") + data = None diff --git a/paddle_samples/PaddleNLP/albert-chinese-small/graph_net.json b/paddle_samples/PaddleNLP/albert-chinese-small/graph_net.json new file mode 100644 index 00000000..aac3cc2f --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-small/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-chinese-small", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-chinese-small/input_meta.py b/paddle_samples/PaddleNLP/albert-chinese-small/input_meta.py new file mode 100644 index 00000000..3708564f --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-small/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3614, 6816, 886, 4500, 4636, 2428, 7607, 3444, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-chinese-small/model.py b/paddle_samples/PaddleNLP/albert-chinese-small/model.py new file mode 100644 index 00000000..aac4bc18 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-small/model.py @@ -0,0 +1,914 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x11xi64) <- (1x11xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x11xi64) <- (1x1x11xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x11xf32) <- (1x1x1x11xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [11] + + # pd_op.slice: (1x11xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 21128x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x11x128xf32, 1x11xf32, 1x11xf32) <- (1x11x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x128xf32, 128x384xf32) + matmul_0 = paddle._C_ops.matmul(layer_norm_0, parameter_20, False, False) + del layer_norm_0, parameter_20 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 11, 12, 32] + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_2, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_5 = paddle._C_ops.matmul(softmax_0, transpose_2, False, False) + del softmax_0, transpose_2 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_8 = paddle._C_ops.add(add_2, add_7) + del add_2, add_7 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_0 = paddle._C_ops.gelu(add_9, False) + del add_9 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_8 = paddle._C_ops.matmul(gelu_0, parameter_4, False, False) + del gelu_0 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_11 = paddle._C_ops.add(add_10, layer_norm_3) + del add_10, layer_norm_3 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_4) + del add_12 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_3 = paddle._C_ops.scale(matmul_12, full_2, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_15 = paddle._C_ops.add(scale_3, scale_1) + del scale_3 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_13 = paddle._C_ops.matmul(softmax_1, transpose_6, False, False) + del softmax_1, transpose_6 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_17 = paddle._C_ops.add(layer_norm_6, add_16) + del add_16, layer_norm_6 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_1 = paddle._C_ops.gelu(add_18, False) + del add_18 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_16 = paddle._C_ops.matmul(gelu_1, parameter_4, False, False) + del gelu_1 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_20 = paddle._C_ops.add(add_19, layer_norm_9) + del add_19, layer_norm_9 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_4) + del add_21 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_4) + del add_22 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_4 = paddle._C_ops.scale(matmul_20, full_2, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_24 = paddle._C_ops.add(scale_4, scale_1) + del scale_4 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_21 = paddle._C_ops.matmul(softmax_2, transpose_10, False, False) + del softmax_2, transpose_10 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_26 = paddle._C_ops.add(layer_norm_12, add_25) + del add_25, layer_norm_12 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_2 = paddle._C_ops.gelu(add_27, False) + del add_27 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_24 = paddle._C_ops.matmul(gelu_2, parameter_4, False, False) + del gelu_2 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_29 = paddle._C_ops.add(add_28, layer_norm_15) + del add_28, layer_norm_15 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_4) + del add_30 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_4) + del add_31 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_4) + del add_32 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_28, full_2, float("0"), True) + del matmul_28 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_33 = paddle._C_ops.add(scale_5, scale_1) + del scale_5 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_29 = paddle._C_ops.matmul(softmax_3, transpose_14, False, False) + del softmax_3, transpose_14 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del transpose_15 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del reshape_15 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_35 = paddle._C_ops.add(layer_norm_18, add_34) + del add_34, layer_norm_18 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_3 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_32 = paddle._C_ops.matmul(gelu_3, parameter_4, False, False) + del gelu_3 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_38 = paddle._C_ops.add(add_37, layer_norm_21) + del add_37, layer_norm_21 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_16, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_15) + del matmul_33 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_14, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_13) + del matmul_34 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_12, False, False) + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_41 = paddle._C_ops.add(matmul_35, parameter_11) + del matmul_35 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_39, full_int_array_4) + del add_39 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_40, full_int_array_4) + del add_40 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_41, full_int_array_4) + del add_41 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_36 = paddle._C_ops.matmul(transpose_16, transpose_17, False, True) + del transpose_16, transpose_17 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_6 = paddle._C_ops.scale(matmul_36, full_2, float("0"), True) + del matmul_36 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_42 = paddle._C_ops.add(scale_6, scale_1) + del scale_6 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_37 = paddle._C_ops.matmul(softmax_4, transpose_18, False, False) + del softmax_4, transpose_18 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_5) + del transpose_19 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_10, False, False) + del reshape_19 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_43 = paddle._C_ops.add(matmul_38, parameter_9) + del matmul_38 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_44 = paddle._C_ops.add(layer_norm_24, add_43) + del add_43, layer_norm_24 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_44, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_44 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_6, False, False) + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_5) + del matmul_39 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_4 = paddle._C_ops.gelu(add_45, False) + del add_45 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_40 = paddle._C_ops.matmul(gelu_4, parameter_4, False, False) + del gelu_4 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_46 = paddle._C_ops.add(matmul_40, parameter_3) + del matmul_40 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_47 = paddle._C_ops.add(add_46, layer_norm_27) + del add_46, layer_norm_27 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_47, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_47 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_15) + del matmul_41, parameter_15 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_13) + del matmul_42, parameter_13 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_50 = paddle._C_ops.add(matmul_43, parameter_11) + del matmul_43, parameter_11 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_48, full_int_array_4) + del add_48 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_49, full_int_array_4) + del add_49 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x12x32xf32) <- (1x11x384xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_50, full_int_array_4) + del add_50, full_int_array_4 + + # pd_op.transpose: (1x12x11x32xf32) <- (1x11x12x32xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + matmul_44 = paddle._C_ops.matmul(transpose_20, transpose_21, False, True) + del transpose_20, transpose_21 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_7 = paddle._C_ops.scale(matmul_44, full_2, float("0"), True) + del full_2, matmul_44 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_51 = paddle._C_ops.add(scale_7, scale_1) + del scale_1, scale_7 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_51, -1) + del add_51 + + # pd_op.matmul: (1x12x11x32xf32) <- (1x12x11x11xf32, 1x12x11x32xf32) + matmul_45 = paddle._C_ops.matmul(softmax_5, transpose_22, False, False) + del softmax_5, transpose_22 + + # pd_op.transpose: (1x11x12x32xf32) <- (1x12x11x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x12x32xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_5) + del full_int_array_5, transpose_23 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_10, False, False) + del parameter_10, reshape_23 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_52 = paddle._C_ops.add(matmul_46, parameter_9) + del matmul_46, parameter_9 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_53 = paddle._C_ops.add(layer_norm_30, add_52) + del add_52, layer_norm_30 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_53, parameter_7, parameter_8 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x11x1536xf32) <- (1x11x1536xf32, 1536xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_5) + del matmul_47, parameter_5 + + # pd_op.gelu: (1x11x1536xf32) <- (1x11x1536xf32) + gelu_5 = paddle._C_ops.gelu(add_54, False) + del add_54 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_48 = paddle._C_ops.matmul(gelu_5, parameter_4, False, False) + del gelu_5, parameter_4 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 384xf32) + add_55 = paddle._C_ops.add(matmul_48, parameter_3) + del matmul_48, parameter_3 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_56 = paddle._C_ops.add(add_55, layer_norm_33) + del add_55, layer_norm_33 + + # pd_op.layer_norm: (1x11x384xf32, 1x11xf32, 1x11xf32) <- (1x11x384xf32, 384xf32, 384xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_56, parameter_17, parameter_18 + + # pd_op.slice: (1x384xf32) <- (1x11x384xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_36, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x384xf32) <- (1x384xf32, 384x384xf32) + matmul_49 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x384xf32) <- (1x384xf32, 384xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_1) + del matmul_49, parameter_1 + + # pd_op.tanh: (1x384xf32) <- (1x384xf32) + tanh_0 = paddle._C_ops.tanh(add_57) + del add_57, layer_norm_36 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-chinese-small/weight_meta.py b/paddle_samples/PaddleNLP/albert-chinese-small/weight_meta.py new file mode 100644 index 00000000..a92a1fce --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-small/weight_meta.py @@ -0,0 +1,237 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0920974") + max_val = float("0.0898767") + mean = float("2.2093e-05") + std = float("0.0200502") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1536, 384] + dtype = "float32" + min_val = float("-0.0904018") + max_val = float("0.0969833") + mean = float("-3.42421e-05") + std = float("0.0199846") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [1536] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [384, 1536] + dtype = "float32" + min_val = float("-0.101128") + max_val = float("0.0953021") + mean = float("-3.2029e-05") + std = float("0.0200196") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [384] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + std = float("5.96046e-08") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.090305") + max_val = float("0.0853635") + mean = float("-3.53809e-05") + std = float("0.0199774") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0886189") + max_val = float("0.0784958") + mean = float("6.89041e-05") + std = float("0.0199391") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.085532") + max_val = float("0.0893973") + mean = float("1.21036e-05") + std = float("0.0199489") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.082137") + max_val = float("0.0837528") + mean = float("3.60444e-05") + std = float("0.0199884") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [384] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + std = float("5.96046e-08") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [384] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 384] + dtype = "float32" + min_val = float("-0.0793836") + max_val = float("0.0805083") + mean = float("0.000188609") + std = float("0.0199533") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0491545") + max_val = float("0.0631497") + mean = float("0.000771939") + std = float("0.0196944") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0755785") + max_val = float("0.0787668") + mean = float("9.04328e-06") + std = float("0.0199793") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [21128, 128] + dtype = "float32" + min_val = float("-0.0980633") + max_val = float("0.0982526") + mean = float("-7.01566e-06") + std = float("0.0200018") + data = None diff --git a/paddle_samples/PaddleNLP/albert-chinese-tiny/graph_net.json b/paddle_samples/PaddleNLP/albert-chinese-tiny/graph_net.json new file mode 100644 index 00000000..d83669f6 --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-tiny/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "albert-chinese-tiny", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/albert-chinese-tiny/input_meta.py b/paddle_samples/PaddleNLP/albert-chinese-tiny/input_meta.py new file mode 100644 index 00000000..3708564f --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-tiny/input_meta.py @@ -0,0 +1,19 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3614, 6816, 886, 4500, 4636, 2428, 7607, 3444, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/albert-chinese-tiny/model.py b/paddle_samples/PaddleNLP/albert-chinese-tiny/model.py new file mode 100644 index 00000000..558af36b --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-tiny/model.py @@ -0,0 +1,662 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + data_0, + data_1, + data_2, + ): + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [1] + + # pd_op.unsqueeze: (1x1x11xi64) <- (1x11xi64, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [2] + + # pd_op.unsqueeze: (1x1x1x11xi64) <- (1x1x11xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(unsqueeze_0, full_int_array_1) + del full_int_array_1, unsqueeze_0 + + # pd_op.cast: (1x1x1x11xf32) <- (1x1x1x11xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_1, paddle.float32) + del unsqueeze_1 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0, full_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x11xf32) <- (1x1x1x11xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del full_1, scale_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [11] + + # pd_op.slice: (1x11xi64) <- (1x512xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_0, [1], full_int_array_2, full_int_array_3, [1], [] + ) + del full_int_array_3, parameter_0 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 21128x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_25, 0, False) + del data_0, parameter_25 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 2x128xf32) + embedding_1 = paddle._C_ops.embedding(data_2, parameter_23, -1, False) + del data_2, parameter_23 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.embedding: (1x11x128xf32) <- (1x11xi64, 512x128xf32) + embedding_2 = paddle._C_ops.embedding(slice_0, parameter_24, -1, False) + del parameter_24, slice_0 + + # pd_op.add: (1x11x128xf32) <- (1x11x128xf32, 1x11x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x11x128xf32, 1x11xf32, 1x11xf32) <- (1x11x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_22, parameter_21, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_21, parameter_22 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x128xf32, 128x312xf32) + matmul_0 = paddle._C_ops.matmul(layer_norm_0, parameter_20, False, False) + del layer_norm_0, parameter_20 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_19) + del matmul_0, parameter_19 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_16, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_15) + del matmul_1 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_14, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_13) + del matmul_2 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_12, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_11) + del matmul_3 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_4 = [1, 11, 12, 26] + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_4) + del add_3 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_4) + del add_4 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_4) + del add_5 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x26xf32, 1x12x11x26xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.196116"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_4, full_2, float("0"), True) + del matmul_4 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_6 = paddle._C_ops.add(scale_2, scale_1) + del scale_2 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.matmul: (1x12x11x26xf32) <- (1x12x11x11xf32, 1x12x11x26xf32) + matmul_5 = paddle._C_ops.matmul(softmax_0, transpose_2, False, False) + del softmax_0, transpose_2 + + # pd_op.transpose: (1x11x12x26xf32) <- (1x12x11x26xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_5 = [0, 0, -1] + + # pd_op.reshape: (1x11x312xf32) <- (1x11x12x26xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_5) + del transpose_3 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_10, False, False) + del reshape_3 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_9) + del matmul_6 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_8 = paddle._C_ops.add(add_2, add_7) + del add_2, add_7 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8 + + # pd_op.matmul: (1x11x1248xf32) <- (1x11x312xf32, 312x1248xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_6, False, False) + + # pd_op.add: (1x11x1248xf32) <- (1x11x1248xf32, 1248xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_5) + del matmul_7 + + # pd_op.gelu: (1x11x1248xf32) <- (1x11x1248xf32) + gelu_0 = paddle._C_ops.gelu(add_9, False) + del add_9 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x1248xf32, 1248x312xf32) + matmul_8 = paddle._C_ops.matmul(gelu_0, parameter_4, False, False) + del gelu_0 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_3) + del matmul_8 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_11 = paddle._C_ops.add(add_10, layer_norm_3) + del add_10, layer_norm_3 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_16, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_15) + del matmul_9 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_14, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_13) + del matmul_10 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_12, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_11) + del matmul_11 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_4) + del add_12 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_4) + del add_13 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_4) + del add_14 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x26xf32, 1x12x11x26xf32) + matmul_12 = paddle._C_ops.matmul(transpose_4, transpose_5, False, True) + del transpose_4, transpose_5 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_3 = paddle._C_ops.scale(matmul_12, full_2, float("0"), True) + del matmul_12 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_15 = paddle._C_ops.add(scale_3, scale_1) + del scale_3 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.matmul: (1x12x11x26xf32) <- (1x12x11x11xf32, 1x12x11x26xf32) + matmul_13 = paddle._C_ops.matmul(softmax_1, transpose_6, False, False) + del softmax_1, transpose_6 + + # pd_op.transpose: (1x11x12x26xf32) <- (1x12x11x26xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x11x312xf32) <- (1x11x12x26xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_5) + del transpose_7 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_10, False, False) + del reshape_7 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_9) + del matmul_14 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_17 = paddle._C_ops.add(layer_norm_6, add_16) + del add_16, layer_norm_6 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17 + + # pd_op.matmul: (1x11x1248xf32) <- (1x11x312xf32, 312x1248xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_6, False, False) + + # pd_op.add: (1x11x1248xf32) <- (1x11x1248xf32, 1248xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_5) + del matmul_15 + + # pd_op.gelu: (1x11x1248xf32) <- (1x11x1248xf32) + gelu_1 = paddle._C_ops.gelu(add_18, False) + del add_18 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x1248xf32, 1248x312xf32) + matmul_16 = paddle._C_ops.matmul(gelu_1, parameter_4, False, False) + del gelu_1 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_3) + del matmul_16 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_20 = paddle._C_ops.add(add_19, layer_norm_9) + del add_19, layer_norm_9 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_16, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_15) + del matmul_17 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_14, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_13) + del matmul_18 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_12, False, False) + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_11) + del matmul_19 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_4) + del add_21 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_4) + del add_22 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_4) + del add_23 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x26xf32, 1x12x11x26xf32) + matmul_20 = paddle._C_ops.matmul(transpose_8, transpose_9, False, True) + del transpose_8, transpose_9 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_4 = paddle._C_ops.scale(matmul_20, full_2, float("0"), True) + del matmul_20 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_24 = paddle._C_ops.add(scale_4, scale_1) + del scale_4 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.matmul: (1x12x11x26xf32) <- (1x12x11x11xf32, 1x12x11x26xf32) + matmul_21 = paddle._C_ops.matmul(softmax_2, transpose_10, False, False) + del softmax_2, transpose_10 + + # pd_op.transpose: (1x11x12x26xf32) <- (1x12x11x26xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x11x312xf32) <- (1x11x12x26xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_5) + del transpose_11 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_10, False, False) + del reshape_11 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_9) + del matmul_22 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_26 = paddle._C_ops.add(layer_norm_12, add_25) + del add_25, layer_norm_12 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26 + + # pd_op.matmul: (1x11x1248xf32) <- (1x11x312xf32, 312x1248xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_6, False, False) + + # pd_op.add: (1x11x1248xf32) <- (1x11x1248xf32, 1248xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_5) + del matmul_23 + + # pd_op.gelu: (1x11x1248xf32) <- (1x11x1248xf32) + gelu_2 = paddle._C_ops.gelu(add_27, False) + del add_27 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x1248xf32, 1248x312xf32) + matmul_24 = paddle._C_ops.matmul(gelu_2, parameter_4, False, False) + del gelu_2 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_3) + del matmul_24 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_29 = paddle._C_ops.add(add_28, layer_norm_15) + del add_28, layer_norm_15 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_16, False, False) + del parameter_16 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_15) + del matmul_25, parameter_15 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_14, False, False) + del parameter_14 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_13) + del matmul_26, parameter_13 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_12, False, False) + del parameter_12 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_11) + del matmul_27, parameter_11 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_4) + del add_30 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_4) + del add_31 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x26xf32) <- (1x11x312xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_4) + del add_32, full_int_array_4 + + # pd_op.transpose: (1x12x11x26xf32) <- (1x11x12x26xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x26xf32, 1x12x11x26xf32) + matmul_28 = paddle._C_ops.matmul(transpose_12, transpose_13, False, True) + del transpose_12, transpose_13 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_28, full_2, float("0"), True) + del full_2, matmul_28 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_33 = paddle._C_ops.add(scale_5, scale_1) + del scale_1, scale_5 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (1x12x11x26xf32) <- (1x12x11x11xf32, 1x12x11x26xf32) + matmul_29 = paddle._C_ops.matmul(softmax_3, transpose_14, False, False) + del softmax_3, transpose_14 + + # pd_op.transpose: (1x11x12x26xf32) <- (1x12x11x26xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x11x312xf32) <- (1x11x12x26xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_5) + del full_int_array_5, transpose_15 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x312xf32, 312x312xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_10, False, False) + del parameter_10, reshape_15 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_9) + del matmul_30, parameter_9 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_35 = paddle._C_ops.add(layer_norm_18, add_34) + del add_34, layer_norm_18 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_8, parameter_7, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35, parameter_7, parameter_8 + + # pd_op.matmul: (1x11x1248xf32) <- (1x11x312xf32, 312x1248xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_6, False, False) + del parameter_6 + + # pd_op.add: (1x11x1248xf32) <- (1x11x1248xf32, 1248xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_5) + del matmul_31, parameter_5 + + # pd_op.gelu: (1x11x1248xf32) <- (1x11x1248xf32) + gelu_3 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (1x11x312xf32) <- (1x11x1248xf32, 1248x312xf32) + matmul_32 = paddle._C_ops.matmul(gelu_3, parameter_4, False, False) + del gelu_3, parameter_4 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 312xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_3) + del matmul_32, parameter_3 + + # pd_op.add: (1x11x312xf32) <- (1x11x312xf32, 1x11x312xf32) + add_38 = paddle._C_ops.add(add_37, layer_norm_21) + del add_37, layer_norm_21 + + # pd_op.layer_norm: (1x11x312xf32, 1x11xf32, 1x11xf32) <- (1x11x312xf32, 312xf32, 312xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_18, parameter_17, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38, parameter_17, parameter_18 + + # pd_op.slice: (1x312xf32) <- (1x11x312xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_24, [1], full_int_array_2, full_int_array_0, [1], [1] + ) + del full_int_array_0, full_int_array_2 + + # pd_op.matmul: (1x312xf32) <- (1x312xf32, 312x312xf32) + matmul_33 = paddle._C_ops.matmul(slice_1, parameter_2, False, False) + del parameter_2, slice_1 + + # pd_op.add: (1x312xf32) <- (1x312xf32, 312xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_1) + del matmul_33, parameter_1 + + # pd_op.tanh: (1x312xf32) <- (1x312xf32) + tanh_0 = paddle._C_ops.tanh(add_39) + del add_39, layer_norm_24 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/albert-chinese-tiny/weight_meta.py b/paddle_samples/PaddleNLP/albert-chinese-tiny/weight_meta.py new file mode 100644 index 00000000..3bac8dae --- /dev/null +++ b/paddle_samples/PaddleNLP/albert-chinese-tiny/weight_meta.py @@ -0,0 +1,235 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1, 512] + dtype = "int64" + min_val = 0 + max_val = 511 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0943927") + max_val = float("0.0805598") + mean = float("-5.04225e-05") + std = float("0.0199894") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1248, 312] + dtype = "float32" + min_val = float("-0.0882163") + max_val = float("0.0910547") + mean = float("-7.22546e-06") + std = float("0.0199967") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [1248] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [312, 1248] + dtype = "float32" + min_val = float("-0.089193") + max_val = float("0.10013") + mean = float("-5.28496e-05") + std = float("0.0199953") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [312] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0864897") + max_val = float("0.0923653") + mean = float("6.8981e-05") + std = float("0.0200065") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0912581") + max_val = float("0.0870574") + mean = float("-4.12729e-05") + std = float("0.0200247") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0832851") + max_val = float("0.0934653") + mean = float("-2.10013e-05") + std = float("0.0200296") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [312, 312] + dtype = "float32" + min_val = float("-0.0901047") + max_val = float("0.0851487") + mean = float("-2.36235e-06") + std = float("0.020018") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [312] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [312] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [128, 312] + dtype = "float32" + min_val = float("-0.0820946") + max_val = float("0.0974006") + mean = float("-5.19528e-05") + std = float("0.0200838") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.0599234") + max_val = float("0.066722") + mean = float("-0.00042005") + std = float("0.0224804") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0834669") + max_val = float("0.0840402") + mean = float("0.000142117") + std = float("0.0199409") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [21128, 128] + dtype = "float32" + min_val = float("-0.0938932") + max_val = float("0.105185") + mean = float("1.19757e-05") + std = float("0.0199975") + data = None diff --git a/paddle_samples/PaddleNLP/t5-small/graph_net.json b/paddle_samples/PaddleNLP/t5-small/graph_net.json new file mode 100644 index 00000000..6b649b3d --- /dev/null +++ b/paddle_samples/PaddleNLP/t5-small/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "t5-small", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/t5-small/input_meta.py b/paddle_samples/PaddleNLP/t5-small/input_meta.py new file mode 100644 index 00000000..846bab06 --- /dev/null +++ b/paddle_samples/PaddleNLP/t5-small/input_meta.py @@ -0,0 +1,40 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 20] + dtype = "int64" + data = [ + 8774, + 6, + 82, + 564, + 19, + 5762, + 5, + 27, + 183, + 1036, + 81, + 508, + 1612, + 2250, + 11, + 70, + 4648, + 7, + 5, + 1, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 20] + dtype = "int64" + data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [1, 1] + dtype = "int64" + data = [0] diff --git a/paddle_samples/PaddleNLP/t5-small/model.py b/paddle_samples/PaddleNLP/t5-small/model.py new file mode 100644 index 00000000..29476168 --- /dev/null +++ b/paddle_samples/PaddleNLP/t5-small/model.py @@ -0,0 +1,3317 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + data_0, + data_1, + data_2, + ): + # pd_op.embedding: (1x20x512xf32) <- (1x20xi64, 32128x512xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_130, -1, False) + del data_0 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x20xi64) <- (1x20xi64, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(data_1, full_int_array_0) + del data_1 + + # pd_op.cast: (1x1x1x20xf32) <- (1x1x1x20xi64) + cast_0 = paddle._C_ops.cast(unsqueeze_0, paddle.float32) + del unsqueeze_0 + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("-1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x20xf32) <- (1x1x1x20xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_0, float("1"), True) + del cast_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x1x20xf32) <- (1x1x1x20xf32, 1xf32) + scale_1 = paddle._C_ops.scale(scale_0, full_1, float("0"), True) + del scale_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_0 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_0 = paddle._C_ops.pow(dropout_0, float("2")) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_0 = paddle._C_ops.mean(pow_0, full_int_array_1, True) + del pow_0 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_2 = paddle._C_ops.scale(mean_0, full_3, float("1e-06"), True) + del mean_0 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_0 = paddle._C_ops.rsqrt(scale_2) + del scale_2 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_0 = paddle._C_ops.multiply(dropout_0, rsqrt_0) + del rsqrt_0 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_1 = paddle._C_ops.multiply(parameter_124, multiply_0) + del multiply_0, parameter_124 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_1 = paddle._C_ops.matmul(multiply_1, parameter_129, False, False) + del parameter_129 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [1, -1, 8, 64] + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_1, full_int_array_2) + del matmul_1 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_2 = paddle._C_ops.matmul(multiply_1, parameter_128, False, False) + del parameter_128 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_2, full_int_array_2) + del matmul_2 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_3 = paddle._C_ops.matmul(multiply_1, parameter_127, False, False) + del multiply_1, parameter_127 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_3, full_int_array_2) + del matmul_3 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_4 = paddle._C_ops.matmul(transpose_0, transpose_1, False, True) + del transpose_0, transpose_1 + + # pd_op.full: (1xf64) <- () + full_4 = paddle._C_ops.full( + [1], float("0"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_5 = paddle._C_ops.full( + [1], float("20"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf64) <- () + full_6 = paddle._C_ops.full( + [1], float("1"), paddle.float64, paddle.core.CPUPlace() + ) + + # pd_op.arange: (20xi64) <- (1xf64, 1xf64, 1xf64) + arange_0 = paddle.arange(full_4, full_5, full_6, dtype="int64") + del full_5 + + # pd_op.unsqueeze: (20x1xi64) <- (20xi64, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(arange_0, full_int_array_1) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [0] + + # pd_op.unsqueeze: (1x20xi64) <- (20xi64, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(arange_0, full_int_array_3) + del arange_0 + + # pd_op.subtract: (20x20xi64) <- (1x20xi64, 20x1xi64) + subtract_0 = paddle._C_ops.subtract(unsqueeze_2, unsqueeze_1) + del unsqueeze_1, unsqueeze_2 + + # pd_op.full: (xi64) <- () + full_7 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.greater_than: (20x20xb) <- (20x20xi64, xi64) + greater_than_0 = paddle._C_ops.greater_than(subtract_0, full_7) + del full_7 + + # pd_op.cast: (20x20xi64) <- (20x20xb) + cast_1 = paddle._C_ops.cast(greater_than_0, paddle.int64) + del greater_than_0 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("16"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (20x20xi64) <- (20x20xi64, 1xf32) + scale_3 = paddle._C_ops.scale(cast_1, full_8, float("0"), True) + del cast_1 + + # pd_op.scale: (20x20xi64) <- (20x20xi64, 1xf32) + scale_4 = paddle._C_ops.scale(scale_3, full_3, float("0"), True) + del scale_3 + + # pd_op.abs: (20x20xi64) <- (20x20xi64) + abs_0 = paddle._C_ops.abs(subtract_0) + del subtract_0 + + # pd_op.full: (xi64) <- () + full_9 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.less_than: (20x20xb) <- (20x20xi64, xi64) + less_than_0 = paddle._C_ops.less_than(abs_0, full_9) + del full_9 + + # pd_op.cast: (20x20xf32) <- (20x20xi64) + cast_2 = paddle._C_ops.cast(abs_0, paddle.float32) + + # pd_op.full: (1xf32) <- () + full_10 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (20x20xf32) <- (20x20xf32, 1xf32) + scale_5 = paddle._C_ops.scale(cast_2, full_10, float("0"), True) + del cast_2, full_10 + + # pd_op.log: (20x20xf32) <- (20x20xf32) + log_0 = paddle._C_ops.log(scale_5) + del scale_5 + + # pd_op.full: (1xf32) <- () + full_11 = paddle._C_ops.full( + [1], float("0.360674"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (20x20xf32) <- (20x20xf32, 1xf32) + scale_6 = paddle._C_ops.scale(log_0, full_11, float("0"), True) + del full_11, log_0 + + # pd_op.full: (1xf32) <- () + full_12 = paddle._C_ops.full( + [1], float("8"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (20x20xf32) <- (20x20xf32, 1xf32) + scale_7 = paddle._C_ops.scale(scale_6, full_12, float("0"), True) + del full_12, scale_6 + + # pd_op.cast: (20x20xi64) <- (20x20xf32) + cast_3 = paddle._C_ops.cast(scale_7, paddle.int64) + del scale_7 + + # pd_op.scale: (20x20xi64) <- (20x20xi64, 1xf32) + scale_8 = paddle._C_ops.scale(cast_3, full_3, float("8"), True) + del cast_3 + + # pd_op.full: (1xf32) <- () + full_13 = paddle._C_ops.full( + [1], float("15"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (20x20xi64) <- (20x20xi64, 1xf32) + full_like_0 = paddle._C_ops.full_like( + scale_8, full_13, paddle.int64, paddle.framework._current_expected_place() + ) + del full_13 + + # pd_op.minimum: (20x20xi64) <- (20x20xi64, 20x20xi64) + minimum_0 = paddle._C_ops.minimum(scale_8, full_like_0) + del full_like_0, scale_8 + + # pd_op.where: (20x20xi64) <- (20x20xb, 20x20xi64, 20x20xi64) + where_0 = paddle._C_ops.where(less_than_0, abs_0, minimum_0) + del abs_0, less_than_0, minimum_0 + + # pd_op.add: (20x20xi64) <- (20x20xi64, 20x20xi64) + add_0 = paddle._C_ops.add(scale_4, where_0) + del scale_4, where_0 + + # pd_op.embedding: (20x20x8xf32) <- (20x20xi64, 32x8xf32) + embedding_1 = paddle._C_ops.embedding(add_0, parameter_125, -1, False) + del add_0, parameter_125 + + # pd_op.transpose: (8x20x20xf32) <- (20x20x8xf32) + transpose_3 = paddle._C_ops.transpose(embedding_1, [2, 0, 1]) + del embedding_1 + + # pd_op.unsqueeze: (1x8x20x20xf32) <- (8x20x20xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_3, full_int_array_3) + del transpose_3 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x1x1x20xf32) + add_1 = paddle._C_ops.add(unsqueeze_3, scale_1) + del unsqueeze_3 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_2 = paddle._C_ops.add(matmul_4, add_1) + del matmul_4 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_0 = paddle._C_ops.softmax(add_2, -1) + del add_2 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_5 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_4 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [1, -1, 512] + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_4, full_int_array_4) + del transpose_4 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_126, False, False) + del parameter_126, reshape_3 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_6, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_6 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_3 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_1 = paddle._C_ops.pow(add_3, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_1 = paddle._C_ops.mean(pow_1, full_int_array_1, True) + del pow_1 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_9 = paddle._C_ops.scale(mean_1, full_3, float("1e-06"), True) + del mean_1 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_1 = paddle._C_ops.rsqrt(scale_9) + del scale_9 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_2 = paddle._C_ops.multiply(add_3, rsqrt_1) + del rsqrt_1 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_3 = paddle._C_ops.multiply(parameter_121, multiply_2) + del multiply_2, parameter_121 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_7 = paddle._C_ops.matmul(multiply_3, parameter_123, False, False) + del multiply_3, parameter_123 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_0 = paddle._C_ops.relu(matmul_7) + del matmul_7 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_0, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_0 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_8 = paddle._C_ops.matmul(dropout_6, parameter_122, False, False) + del dropout_6, parameter_122 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_8, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_8 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_4 = paddle._C_ops.add(dropout_8, add_3) + del add_3, dropout_8 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_2 = paddle._C_ops.pow(add_4, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_2 = paddle._C_ops.mean(pow_2, full_int_array_1, True) + del pow_2 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_10 = paddle._C_ops.scale(mean_2, full_3, float("1e-06"), True) + del mean_2 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_2 = paddle._C_ops.rsqrt(scale_10) + del scale_10 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_4 = paddle._C_ops.multiply(add_4, rsqrt_2) + del rsqrt_2 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_5 = paddle._C_ops.multiply(parameter_116, multiply_4) + del multiply_4, parameter_116 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_9 = paddle._C_ops.matmul(multiply_5, parameter_120, False, False) + del parameter_120 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(matmul_9, full_int_array_2) + del matmul_9 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_10 = paddle._C_ops.matmul(multiply_5, parameter_119, False, False) + del parameter_119 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(matmul_10, full_int_array_2) + del matmul_10 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_11 = paddle._C_ops.matmul(multiply_5, parameter_118, False, False) + del multiply_5, parameter_118 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(matmul_11, full_int_array_2) + del matmul_11 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_7 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_12 = paddle._C_ops.matmul(transpose_5, transpose_6, False, True) + del transpose_5, transpose_6 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_5 = paddle._C_ops.add(matmul_12, add_1) + del matmul_12 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_1 = paddle._C_ops.softmax(add_5, -1) + del add_5 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_13 = paddle._C_ops.matmul(dropout_10, transpose_7, False, False) + del dropout_10, transpose_7 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_8 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_8, full_int_array_4) + del transpose_8 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_117, False, False) + del parameter_117, reshape_7 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_14, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_14 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_6 = paddle._C_ops.add(add_4, dropout_12) + del add_4, dropout_12 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_3 = paddle._C_ops.pow(add_6, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_3 = paddle._C_ops.mean(pow_3, full_int_array_1, True) + del pow_3 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_11 = paddle._C_ops.scale(mean_3, full_3, float("1e-06"), True) + del mean_3 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_3 = paddle._C_ops.rsqrt(scale_11) + del scale_11 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_6 = paddle._C_ops.multiply(add_6, rsqrt_3) + del rsqrt_3 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_7 = paddle._C_ops.multiply(parameter_113, multiply_6) + del multiply_6, parameter_113 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_15 = paddle._C_ops.matmul(multiply_7, parameter_115, False, False) + del multiply_7, parameter_115 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_1 = paddle._C_ops.relu(matmul_15) + del matmul_15 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_1, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_1 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_16 = paddle._C_ops.matmul(dropout_14, parameter_114, False, False) + del dropout_14, parameter_114 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_16, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_16 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_7 = paddle._C_ops.add(dropout_16, add_6) + del add_6, dropout_16 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_4 = paddle._C_ops.pow(add_7, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_4 = paddle._C_ops.mean(pow_4, full_int_array_1, True) + del pow_4 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_12 = paddle._C_ops.scale(mean_4, full_3, float("1e-06"), True) + del mean_4 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_4 = paddle._C_ops.rsqrt(scale_12) + del scale_12 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_8 = paddle._C_ops.multiply(add_7, rsqrt_4) + del rsqrt_4 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_9 = paddle._C_ops.multiply(parameter_108, multiply_8) + del multiply_8, parameter_108 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_17 = paddle._C_ops.matmul(multiply_9, parameter_112, False, False) + del parameter_112 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_17, full_int_array_2) + del matmul_17 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_18 = paddle._C_ops.matmul(multiply_9, parameter_111, False, False) + del parameter_111 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_18, full_int_array_2) + del matmul_18 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_19 = paddle._C_ops.matmul(multiply_9, parameter_110, False, False) + del multiply_9, parameter_110 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_19, full_int_array_2) + del matmul_19 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_11 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_20 = paddle._C_ops.matmul(transpose_9, transpose_10, False, True) + del transpose_10, transpose_9 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_8 = paddle._C_ops.add(matmul_20, add_1) + del matmul_20 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_2 = paddle._C_ops.softmax(add_8, -1) + del add_8 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_21 = paddle._C_ops.matmul(dropout_18, transpose_11, False, False) + del dropout_18, transpose_11 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_12 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_12, full_int_array_4) + del transpose_12 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_109, False, False) + del parameter_109, reshape_11 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_22, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_22 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_9 = paddle._C_ops.add(add_7, dropout_20) + del add_7, dropout_20 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_5 = paddle._C_ops.pow(add_9, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_5 = paddle._C_ops.mean(pow_5, full_int_array_1, True) + del pow_5 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_13 = paddle._C_ops.scale(mean_5, full_3, float("1e-06"), True) + del mean_5 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_5 = paddle._C_ops.rsqrt(scale_13) + del scale_13 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_10 = paddle._C_ops.multiply(add_9, rsqrt_5) + del rsqrt_5 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_11 = paddle._C_ops.multiply(parameter_105, multiply_10) + del multiply_10, parameter_105 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_23 = paddle._C_ops.matmul(multiply_11, parameter_107, False, False) + del multiply_11, parameter_107 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_2 = paddle._C_ops.relu(matmul_23) + del matmul_23 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_2, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_2 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_24 = paddle._C_ops.matmul(dropout_22, parameter_106, False, False) + del dropout_22, parameter_106 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_24, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_24 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_10 = paddle._C_ops.add(dropout_24, add_9) + del add_9, dropout_24 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_6 = paddle._C_ops.pow(add_10, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_6 = paddle._C_ops.mean(pow_6, full_int_array_1, True) + del pow_6 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_14 = paddle._C_ops.scale(mean_6, full_3, float("1e-06"), True) + del mean_6 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_6 = paddle._C_ops.rsqrt(scale_14) + del scale_14 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_12 = paddle._C_ops.multiply(add_10, rsqrt_6) + del rsqrt_6 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_13 = paddle._C_ops.multiply(parameter_100, multiply_12) + del multiply_12, parameter_100 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_25 = paddle._C_ops.matmul(multiply_13, parameter_104, False, False) + del parameter_104 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(matmul_25, full_int_array_2) + del matmul_25 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_26 = paddle._C_ops.matmul(multiply_13, parameter_103, False, False) + del parameter_103 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(matmul_26, full_int_array_2) + del matmul_26 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_27 = paddle._C_ops.matmul(multiply_13, parameter_102, False, False) + del multiply_13, parameter_102 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_27, full_int_array_2) + del matmul_27 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_15 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_28 = paddle._C_ops.matmul(transpose_13, transpose_14, False, True) + del transpose_13, transpose_14 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_11 = paddle._C_ops.add(matmul_28, add_1) + del matmul_28 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_3 = paddle._C_ops.softmax(add_11, -1) + del add_11 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_29 = paddle._C_ops.matmul(dropout_26, transpose_15, False, False) + del dropout_26, transpose_15 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_16 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_16, full_int_array_4) + del transpose_16 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_101, False, False) + del parameter_101, reshape_15 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_30, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_30 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_12 = paddle._C_ops.add(add_10, dropout_28) + del add_10, dropout_28 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_7 = paddle._C_ops.pow(add_12, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_7 = paddle._C_ops.mean(pow_7, full_int_array_1, True) + del pow_7 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_15 = paddle._C_ops.scale(mean_7, full_3, float("1e-06"), True) + del mean_7 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_7 = paddle._C_ops.rsqrt(scale_15) + del scale_15 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_14 = paddle._C_ops.multiply(add_12, rsqrt_7) + del rsqrt_7 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_15 = paddle._C_ops.multiply(parameter_97, multiply_14) + del multiply_14, parameter_97 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_31 = paddle._C_ops.matmul(multiply_15, parameter_99, False, False) + del multiply_15, parameter_99 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_3 = paddle._C_ops.relu(matmul_31) + del matmul_31 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_3, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_3 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_32 = paddle._C_ops.matmul(dropout_30, parameter_98, False, False) + del dropout_30, parameter_98 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_32, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_32 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_13 = paddle._C_ops.add(dropout_32, add_12) + del add_12, dropout_32 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_8 = paddle._C_ops.pow(add_13, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_8 = paddle._C_ops.mean(pow_8, full_int_array_1, True) + del pow_8 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_16 = paddle._C_ops.scale(mean_8, full_3, float("1e-06"), True) + del mean_8 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_8 = paddle._C_ops.rsqrt(scale_16) + del scale_16 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_16 = paddle._C_ops.multiply(add_13, rsqrt_8) + del rsqrt_8 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_17 = paddle._C_ops.multiply(parameter_92, multiply_16) + del multiply_16, parameter_92 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_33 = paddle._C_ops.matmul(multiply_17, parameter_96, False, False) + del parameter_96 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_33, full_int_array_2) + del matmul_33 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_34 = paddle._C_ops.matmul(multiply_17, parameter_95, False, False) + del parameter_95 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_34, full_int_array_2) + del matmul_34 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_35 = paddle._C_ops.matmul(multiply_17, parameter_94, False, False) + del multiply_17, parameter_94 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(matmul_35, full_int_array_2) + del matmul_35 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_19 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_36 = paddle._C_ops.matmul(transpose_17, transpose_18, False, True) + del transpose_17, transpose_18 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_14 = paddle._C_ops.add(matmul_36, add_1) + del matmul_36 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_4 = paddle._C_ops.softmax(add_14, -1) + del add_14 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_37 = paddle._C_ops.matmul(dropout_34, transpose_19, False, False) + del dropout_34, transpose_19 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_20 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_20, full_int_array_4) + del transpose_20 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_93, False, False) + del parameter_93, reshape_19 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_38, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_38 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_15 = paddle._C_ops.add(add_13, dropout_36) + del add_13, dropout_36 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_9 = paddle._C_ops.pow(add_15, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_9 = paddle._C_ops.mean(pow_9, full_int_array_1, True) + del pow_9 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_17 = paddle._C_ops.scale(mean_9, full_3, float("1e-06"), True) + del mean_9 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_9 = paddle._C_ops.rsqrt(scale_17) + del scale_17 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_18 = paddle._C_ops.multiply(add_15, rsqrt_9) + del rsqrt_9 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_19 = paddle._C_ops.multiply(parameter_89, multiply_18) + del multiply_18, parameter_89 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_39 = paddle._C_ops.matmul(multiply_19, parameter_91, False, False) + del multiply_19, parameter_91 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_4 = paddle._C_ops.relu(matmul_39) + del matmul_39 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_4, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_4 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_40 = paddle._C_ops.matmul(dropout_38, parameter_90, False, False) + del dropout_38, parameter_90 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_40, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_40 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_16 = paddle._C_ops.add(dropout_40, add_15) + del add_15, dropout_40 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_10 = paddle._C_ops.pow(add_16, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_10 = paddle._C_ops.mean(pow_10, full_int_array_1, True) + del pow_10 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_18 = paddle._C_ops.scale(mean_10, full_3, float("1e-06"), True) + del mean_10 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_10 = paddle._C_ops.rsqrt(scale_18) + del scale_18 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_20 = paddle._C_ops.multiply(add_16, rsqrt_10) + del rsqrt_10 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_21 = paddle._C_ops.multiply(parameter_84, multiply_20) + del multiply_20, parameter_84 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_41 = paddle._C_ops.matmul(multiply_21, parameter_88, False, False) + del parameter_88 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(matmul_41, full_int_array_2) + del matmul_41 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_42 = paddle._C_ops.matmul(multiply_21, parameter_87, False, False) + del parameter_87 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_42, full_int_array_2) + del matmul_42 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_43 = paddle._C_ops.matmul(multiply_21, parameter_86, False, False) + del multiply_21, parameter_86 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_43, full_int_array_2) + del matmul_43 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_23 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.matmul: (1x8x20x20xf32) <- (1x8x20x64xf32, 1x8x20x64xf32) + matmul_44 = paddle._C_ops.matmul(transpose_21, transpose_22, False, True) + del transpose_21, transpose_22 + + # pd_op.add: (1x8x20x20xf32) <- (1x8x20x20xf32, 1x8x20x20xf32) + add_17 = paddle._C_ops.add(matmul_44, add_1) + del add_1, matmul_44 + + # pd_op.softmax: (1x8x20x20xf32) <- (1x8x20x20xf32) + softmax_5 = paddle._C_ops.softmax(add_17, -1) + del add_17 + + # pd_op.dropout: (1x8x20x20xf32, 1x8x20x20xui8) <- (1x8x20x20xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x8x20x64xf32) <- (1x8x20x20xf32, 1x8x20x64xf32) + matmul_45 = paddle._C_ops.matmul(dropout_42, transpose_23, False, False) + del dropout_42, transpose_23 + + # pd_op.transpose: (1x20x8x64xf32) <- (1x8x20x64xf32) + transpose_24 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x20x512xf32) <- (1x20x8x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_24, full_int_array_4) + del transpose_24 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_85, False, False) + del parameter_85, reshape_23 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_46, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_46 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_18 = paddle._C_ops.add(add_16, dropout_44) + del add_16, dropout_44 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_11 = paddle._C_ops.pow(add_18, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_11 = paddle._C_ops.mean(pow_11, full_int_array_1, True) + del pow_11 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_19 = paddle._C_ops.scale(mean_11, full_3, float("1e-06"), True) + del mean_11 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_11 = paddle._C_ops.rsqrt(scale_19) + del scale_19 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_22 = paddle._C_ops.multiply(add_18, rsqrt_11) + del rsqrt_11 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_23 = paddle._C_ops.multiply(parameter_81, multiply_22) + del multiply_22, parameter_81 + + # pd_op.matmul: (1x20x2048xf32) <- (1x20x512xf32, 512x2048xf32) + matmul_47 = paddle._C_ops.matmul(multiply_23, parameter_83, False, False) + del multiply_23, parameter_83 + + # pd_op.relu: (1x20x2048xf32) <- (1x20x2048xf32) + relu_5 = paddle._C_ops.relu(matmul_47) + del matmul_47 + + # pd_op.dropout: (1x20x2048xf32, 1x20x2048xui8) <- (1x20x2048xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_5, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_5 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x2048xf32, 2048x512xf32) + matmul_48 = paddle._C_ops.matmul(dropout_46, parameter_82, False, False) + del dropout_46, parameter_82 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_48, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_48 + + # pd_op.add: (1x20x512xf32) <- (1x20x512xf32, 1x20x512xf32) + add_19 = paddle._C_ops.add(dropout_48, add_18) + del add_18, dropout_48 + + # pd_op.pow: (1x20x512xf32) <- (1x20x512xf32) + pow_12 = paddle._C_ops.pow(add_19, float("2")) + + # pd_op.mean: (1x20x1xf32) <- (1x20x512xf32, 1xi64) + mean_12 = paddle._C_ops.mean(pow_12, full_int_array_1, True) + del pow_12 + + # pd_op.scale: (1x20x1xf32) <- (1x20x1xf32, 1xf32) + scale_20 = paddle._C_ops.scale(mean_12, full_3, float("1e-06"), True) + del mean_12 + + # pd_op.rsqrt: (1x20x1xf32) <- (1x20x1xf32) + rsqrt_12 = paddle._C_ops.rsqrt(scale_20) + del scale_20 + + # pd_op.multiply: (1x20x512xf32) <- (1x20x512xf32, 1x20x1xf32) + multiply_24 = paddle._C_ops.multiply(add_19, rsqrt_12) + del add_19, rsqrt_12 + + # pd_op.multiply: (1x20x512xf32) <- (512xf32, 1x20x512xf32) + multiply_25 = paddle._C_ops.multiply(parameter_80, multiply_24) + del multiply_24, parameter_80 + + # pd_op.dropout: (1x20x512xf32, 1x20x512xui8) <- (1x20x512xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + multiply_25, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del multiply_25 + + # pd_op.embedding: (1x1x512xf32) <- (1x1xi64, 32128x512xf32) + embedding_2 = paddle._C_ops.embedding(data_2, parameter_130, -1, False) + del data_2 + + # pd_op.full: (1x1xf32) <- () + full_14 = paddle._C_ops.full( + [1, 1], + float("1"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.arange: (1xi64) <- (1xf64, 1xf64, 1xf64) + arange_1 = paddle.arange(full_4, full_6, full_6, dtype="int64") + del full_4, full_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [0, 1] + + # pd_op.unsqueeze: (1x1x1xi64) <- (1xi64, 2xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(arange_1, full_int_array_5) + del full_int_array_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_6 = [1, 1, 1] + + # pd_op.tile: (1x1x1xi64) <- (1x1x1xi64, 3xi64) + tile_0 = paddle._C_ops.tile(unsqueeze_4, full_int_array_6) + del full_int_array_6, unsqueeze_4 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_7 = [0, 2] + + # pd_op.unsqueeze: (1x1x1xi64) <- (1xi64, 2xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(arange_1, full_int_array_7) + del full_int_array_7 + + # pd_op.less_equal: (1x1x1xb) <- (1x1x1xi64, 1x1x1xi64) + less_equal_0 = paddle._C_ops.less_equal(tile_0, unsqueeze_5) + del tile_0, unsqueeze_5 + + # pd_op.cast: (1x1x1xf32) <- (1x1x1xb) + cast_4 = paddle._C_ops.cast(less_equal_0, paddle.float32) + del less_equal_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [1] + + # pd_op.unsqueeze: (1x1x1x1xf32) <- (1x1x1xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(cast_4, full_int_array_8) + del cast_4, full_int_array_8 + + # pd_op.unsqueeze: (1x1x1x1xf32) <- (1x1xf32, 2xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(full_14, full_int_array_0) + del full_14, full_int_array_0 + + # pd_op.multiply: (1x1x1x1xf32) <- (1x1x1x1xf32, 1x1x1x1xf32) + multiply_26 = paddle._C_ops.multiply(unsqueeze_6, unsqueeze_7) + del unsqueeze_6, unsqueeze_7 + + # pd_op.scale: (1x1x1x1xf32) <- (1x1x1x1xf32, 1xf32) + scale_21 = paddle._C_ops.scale(multiply_26, full_0, float("1"), True) + del multiply_26 + + # pd_op.scale: (1x1x1x1xf32) <- (1x1x1x1xf32, 1xf32) + scale_22 = paddle._C_ops.scale(scale_21, full_1, float("0"), True) + del full_1, scale_21 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + embedding_2, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del embedding_2 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_13 = paddle._C_ops.pow(dropout_52, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_13 = paddle._C_ops.mean(pow_13, full_int_array_1, True) + del pow_13 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_23 = paddle._C_ops.scale(mean_13, full_3, float("1e-06"), True) + del mean_13 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_13 = paddle._C_ops.rsqrt(scale_23) + del scale_23 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_27 = paddle._C_ops.multiply(dropout_52, rsqrt_13) + del rsqrt_13 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_28 = paddle._C_ops.multiply(parameter_74, multiply_27) + del multiply_27, parameter_74 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_49 = paddle._C_ops.matmul(multiply_28, parameter_79, False, False) + del parameter_79 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_49, full_int_array_2) + del matmul_49 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_50 = paddle._C_ops.matmul(multiply_28, parameter_78, False, False) + del parameter_78 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(matmul_50, full_int_array_2) + del matmul_50 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_51 = paddle._C_ops.matmul(multiply_28, parameter_77, False, False) + del multiply_28, parameter_77 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(matmul_51, full_int_array_2) + del matmul_51 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_27 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_52 = paddle._C_ops.matmul(transpose_25, transpose_26, False, True) + del transpose_25 + + # pd_op.unsqueeze: (1x1xi64) <- (1xi64, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(arange_1, full_int_array_1) + + # pd_op.unsqueeze: (1x1xi64) <- (1xi64, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(arange_1, full_int_array_3) + del arange_1 + + # pd_op.subtract: (1x1xi64) <- (1x1xi64, 1x1xi64) + subtract_1 = paddle._C_ops.subtract(unsqueeze_9, unsqueeze_8) + del unsqueeze_8, unsqueeze_9 + + # pd_op.full: (1xf32) <- () + full_15 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x1xi64) <- (1x1xi64, 1xf32) + full_like_1 = paddle._C_ops.full_like( + subtract_1, + full_15, + paddle.int64, + paddle.framework._current_expected_place(), + ) + del full_15 + + # pd_op.minimum: (1x1xi64) <- (1x1xi64, 1x1xi64) + minimum_1 = paddle._C_ops.minimum(subtract_1, full_like_1) + del full_like_1, subtract_1 + + # pd_op.scale: (1x1xi64) <- (1x1xi64, 1xf32) + scale_24 = paddle._C_ops.scale(minimum_1, full_0, float("0"), True) + del full_0, minimum_1 + + # pd_op.full: (xi64) <- () + full_16 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.less_than: (1x1xb) <- (1x1xi64, xi64) + less_than_1 = paddle._C_ops.less_than(scale_24, full_16) + del full_16 + + # pd_op.cast: (1x1xf32) <- (1x1xi64) + cast_5 = paddle._C_ops.cast(scale_24, paddle.float32) + + # pd_op.full: (1xf32) <- () + full_17 = paddle._C_ops.full( + [1], float("0.0625"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1xf32) <- (1x1xf32, 1xf32) + scale_25 = paddle._C_ops.scale(cast_5, full_17, float("0"), True) + del cast_5, full_17 + + # pd_op.log: (1x1xf32) <- (1x1xf32) + log_1 = paddle._C_ops.log(scale_25) + del scale_25 + + # pd_op.full: (1xf32) <- () + full_18 = paddle._C_ops.full( + [1], float("0.480898"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1xf32) <- (1x1xf32, 1xf32) + scale_26 = paddle._C_ops.scale(log_1, full_18, float("0"), True) + del full_18, log_1 + + # pd_op.scale: (1x1xf32) <- (1x1xf32, 1xf32) + scale_27 = paddle._C_ops.scale(scale_26, full_8, float("0"), True) + del full_8, scale_26 + + # pd_op.cast: (1x1xi64) <- (1x1xf32) + cast_6 = paddle._C_ops.cast(scale_27, paddle.int64) + del scale_27 + + # pd_op.scale: (1x1xi64) <- (1x1xi64, 1xf32) + scale_28 = paddle._C_ops.scale(cast_6, full_3, float("16"), True) + del cast_6 + + # pd_op.full: (1xf32) <- () + full_19 = paddle._C_ops.full( + [1], float("31"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x1xi64) <- (1x1xi64, 1xf32) + full_like_2 = paddle._C_ops.full_like( + scale_28, full_19, paddle.int64, paddle.framework._current_expected_place() + ) + del full_19 + + # pd_op.minimum: (1x1xi64) <- (1x1xi64, 1x1xi64) + minimum_2 = paddle._C_ops.minimum(scale_28, full_like_2) + del full_like_2, scale_28 + + # pd_op.where: (1x1xi64) <- (1x1xb, 1x1xi64, 1x1xi64) + where_1 = paddle._C_ops.where(less_than_1, scale_24, minimum_2) + del less_than_1, minimum_2, scale_24 + + # pd_op.scale: (1x1xi64) <- (1x1xi64, 1xf32) + scale_29 = paddle._C_ops.scale(where_1, full_3, float("0"), True) + del where_1 + + # pd_op.embedding: (1x1x8xf32) <- (1x1xi64, 32x8xf32) + embedding_3 = paddle._C_ops.embedding(scale_29, parameter_75, -1, False) + del parameter_75, scale_29 + + # pd_op.transpose: (8x1x1xf32) <- (1x1x8xf32) + transpose_28 = paddle._C_ops.transpose(embedding_3, [2, 0, 1]) + del embedding_3 + + # pd_op.unsqueeze: (1x8x1x1xf32) <- (8x1x1xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_3) + del full_int_array_3, transpose_28 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x1x1x1xf32) + add_20 = paddle._C_ops.add(unsqueeze_10, scale_22) + del scale_22, unsqueeze_10 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_21 = paddle._C_ops.add(matmul_52, add_20) + del matmul_52 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_6 = paddle._C_ops.softmax(add_21, -1) + del add_21 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_53 = paddle._C_ops.matmul(dropout_54, transpose_27, False, False) + del dropout_54 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_29 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_29, full_int_array_4) + del transpose_29 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_76, False, False) + del parameter_76, reshape_27 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_54, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_54 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_22 = paddle._C_ops.add(dropout_52, dropout_56) + del dropout_52, dropout_56 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_14 = paddle._C_ops.pow(add_22, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_14 = paddle._C_ops.mean(pow_14, full_int_array_1, True) + del pow_14 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_30 = paddle._C_ops.scale(mean_14, full_3, float("1e-06"), True) + del mean_14 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_14 = paddle._C_ops.rsqrt(scale_30) + del scale_30 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_29 = paddle._C_ops.multiply(add_22, rsqrt_14) + del rsqrt_14 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_30 = paddle._C_ops.multiply(parameter_69, multiply_29) + del multiply_29, parameter_69 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_55 = paddle._C_ops.matmul(multiply_30, parameter_73, False, False) + del multiply_30, parameter_73 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_55, full_int_array_2) + del matmul_55 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_56 = paddle._C_ops.matmul(dropout_50, parameter_72, False, False) + del parameter_72 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_56, full_int_array_2) + del matmul_56 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_31 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_57 = paddle._C_ops.matmul(dropout_50, parameter_71, False, False) + del parameter_71 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_57, full_int_array_2) + del matmul_57 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_58 = paddle._C_ops.matmul(transpose_30, transpose_31, False, True) + del transpose_30 + + # pd_op.full: (1x8x1x20xf32) <- () + full_20 = paddle._C_ops.full( + [1, 8, 1, 20], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x1x1x20xf32) + add_23 = paddle._C_ops.add(full_20, scale_1) + del full_20, scale_1 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_24 = paddle._C_ops.add(matmul_58, add_23) + del matmul_58 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_7 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_59 = paddle._C_ops.matmul(dropout_58, transpose_32, False, False) + del dropout_58 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_33 = paddle._C_ops.transpose(matmul_59, [0, 2, 1, 3]) + del matmul_59 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_33, full_int_array_4) + del transpose_33 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_60 = paddle._C_ops.matmul(reshape_31, parameter_70, False, False) + del parameter_70, reshape_31 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_60, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_60 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_25 = paddle._C_ops.add(add_22, dropout_60) + del add_22, dropout_60 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_15 = paddle._C_ops.pow(add_25, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_15 = paddle._C_ops.mean(pow_15, full_int_array_1, True) + del pow_15 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_31 = paddle._C_ops.scale(mean_15, full_3, float("1e-06"), True) + del mean_15 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_15 = paddle._C_ops.rsqrt(scale_31) + del scale_31 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_31 = paddle._C_ops.multiply(add_25, rsqrt_15) + del rsqrt_15 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_32 = paddle._C_ops.multiply(parameter_66, multiply_31) + del multiply_31, parameter_66 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_61 = paddle._C_ops.matmul(multiply_32, parameter_68, False, False) + del multiply_32, parameter_68 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_6 = paddle._C_ops.relu(matmul_61) + del matmul_61 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_6, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_6 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_62 = paddle._C_ops.matmul(dropout_62, parameter_67, False, False) + del dropout_62, parameter_67 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_62, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_62 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_26 = paddle._C_ops.add(dropout_64, add_25) + del add_25, dropout_64 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_16 = paddle._C_ops.pow(add_26, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_16 = paddle._C_ops.mean(pow_16, full_int_array_1, True) + del pow_16 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_32 = paddle._C_ops.scale(mean_16, full_3, float("1e-06"), True) + del mean_16 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_16 = paddle._C_ops.rsqrt(scale_32) + del scale_32 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_33 = paddle._C_ops.multiply(add_26, rsqrt_16) + del rsqrt_16 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_34 = paddle._C_ops.multiply(parameter_61, multiply_33) + del multiply_33, parameter_61 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_63 = paddle._C_ops.matmul(multiply_34, parameter_65, False, False) + del parameter_65 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(matmul_63, full_int_array_2) + del matmul_63 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_64 = paddle._C_ops.matmul(multiply_34, parameter_64, False, False) + del parameter_64 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(matmul_64, full_int_array_2) + del matmul_64 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_35 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_65 = paddle._C_ops.matmul(multiply_34, parameter_63, False, False) + del multiply_34, parameter_63 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(matmul_65, full_int_array_2) + del matmul_65 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_66 = paddle._C_ops.matmul(transpose_34, transpose_35, False, True) + del transpose_34 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_27 = paddle._C_ops.add(matmul_66, add_20) + del matmul_66 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_8 = paddle._C_ops.softmax(add_27, -1) + del add_27 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_67 = paddle._C_ops.matmul(dropout_66, transpose_36, False, False) + del dropout_66 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_37 = paddle._C_ops.transpose(matmul_67, [0, 2, 1, 3]) + del matmul_67 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_37, full_int_array_4) + del transpose_37 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_68 = paddle._C_ops.matmul(reshape_35, parameter_62, False, False) + del parameter_62, reshape_35 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_68, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_68 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_28 = paddle._C_ops.add(add_26, dropout_68) + del add_26, dropout_68 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_17 = paddle._C_ops.pow(add_28, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_17 = paddle._C_ops.mean(pow_17, full_int_array_1, True) + del pow_17 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_33 = paddle._C_ops.scale(mean_17, full_3, float("1e-06"), True) + del mean_17 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_17 = paddle._C_ops.rsqrt(scale_33) + del scale_33 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_35 = paddle._C_ops.multiply(add_28, rsqrt_17) + del rsqrt_17 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_36 = paddle._C_ops.multiply(parameter_56, multiply_35) + del multiply_35, parameter_56 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_69 = paddle._C_ops.matmul(multiply_36, parameter_60, False, False) + del multiply_36, parameter_60 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_69, full_int_array_2) + del matmul_69 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_70 = paddle._C_ops.matmul(dropout_50, parameter_59, False, False) + del parameter_59 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_70, full_int_array_2) + del matmul_70 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_39 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_71 = paddle._C_ops.matmul(dropout_50, parameter_58, False, False) + del parameter_58 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_71, full_int_array_2) + del matmul_71 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_72 = paddle._C_ops.matmul(transpose_38, transpose_39, False, True) + del transpose_38 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_29 = paddle._C_ops.add(matmul_72, add_23) + del matmul_72 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_9 = paddle._C_ops.softmax(add_29, -1) + del add_29 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_73 = paddle._C_ops.matmul(dropout_70, transpose_40, False, False) + del dropout_70 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_41 = paddle._C_ops.transpose(matmul_73, [0, 2, 1, 3]) + del matmul_73 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_41, full_int_array_4) + del transpose_41 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_74 = paddle._C_ops.matmul(reshape_39, parameter_57, False, False) + del parameter_57, reshape_39 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_74, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_74 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_30 = paddle._C_ops.add(add_28, dropout_72) + del add_28, dropout_72 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_18 = paddle._C_ops.pow(add_30, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_18 = paddle._C_ops.mean(pow_18, full_int_array_1, True) + del pow_18 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_34 = paddle._C_ops.scale(mean_18, full_3, float("1e-06"), True) + del mean_18 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_18 = paddle._C_ops.rsqrt(scale_34) + del scale_34 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_37 = paddle._C_ops.multiply(add_30, rsqrt_18) + del rsqrt_18 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_38 = paddle._C_ops.multiply(parameter_53, multiply_37) + del multiply_37, parameter_53 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_75 = paddle._C_ops.matmul(multiply_38, parameter_55, False, False) + del multiply_38, parameter_55 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_7 = paddle._C_ops.relu(matmul_75) + del matmul_75 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_7, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_7 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_76 = paddle._C_ops.matmul(dropout_74, parameter_54, False, False) + del dropout_74, parameter_54 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_76, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_76 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_31 = paddle._C_ops.add(dropout_76, add_30) + del add_30, dropout_76 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_19 = paddle._C_ops.pow(add_31, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_19 = paddle._C_ops.mean(pow_19, full_int_array_1, True) + del pow_19 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_35 = paddle._C_ops.scale(mean_19, full_3, float("1e-06"), True) + del mean_19 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_19 = paddle._C_ops.rsqrt(scale_35) + del scale_35 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_39 = paddle._C_ops.multiply(add_31, rsqrt_19) + del rsqrt_19 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_40 = paddle._C_ops.multiply(parameter_48, multiply_39) + del multiply_39, parameter_48 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_77 = paddle._C_ops.matmul(multiply_40, parameter_52, False, False) + del parameter_52 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(matmul_77, full_int_array_2) + del matmul_77 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_78 = paddle._C_ops.matmul(multiply_40, parameter_51, False, False) + del parameter_51 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(matmul_78, full_int_array_2) + del matmul_78 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_43 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_79 = paddle._C_ops.matmul(multiply_40, parameter_50, False, False) + del multiply_40, parameter_50 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_79, full_int_array_2) + del matmul_79 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_80 = paddle._C_ops.matmul(transpose_42, transpose_43, False, True) + del transpose_42 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_32 = paddle._C_ops.add(matmul_80, add_20) + del matmul_80 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_10 = paddle._C_ops.softmax(add_32, -1) + del add_32 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_81 = paddle._C_ops.matmul(dropout_78, transpose_44, False, False) + del dropout_78 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_45 = paddle._C_ops.transpose(matmul_81, [0, 2, 1, 3]) + del matmul_81 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_45, full_int_array_4) + del transpose_45 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_82 = paddle._C_ops.matmul(reshape_43, parameter_49, False, False) + del parameter_49, reshape_43 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_82, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_82 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_33 = paddle._C_ops.add(add_31, dropout_80) + del add_31, dropout_80 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_20 = paddle._C_ops.pow(add_33, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_20 = paddle._C_ops.mean(pow_20, full_int_array_1, True) + del pow_20 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_36 = paddle._C_ops.scale(mean_20, full_3, float("1e-06"), True) + del mean_20 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_20 = paddle._C_ops.rsqrt(scale_36) + del scale_36 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_41 = paddle._C_ops.multiply(add_33, rsqrt_20) + del rsqrt_20 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_42 = paddle._C_ops.multiply(parameter_43, multiply_41) + del multiply_41, parameter_43 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_83 = paddle._C_ops.matmul(multiply_42, parameter_47, False, False) + del multiply_42, parameter_47 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_83, full_int_array_2) + del matmul_83 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_84 = paddle._C_ops.matmul(dropout_50, parameter_46, False, False) + del parameter_46 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_84, full_int_array_2) + del matmul_84 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_47 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_85 = paddle._C_ops.matmul(dropout_50, parameter_45, False, False) + del parameter_45 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(matmul_85, full_int_array_2) + del matmul_85 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_48 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_86 = paddle._C_ops.matmul(transpose_46, transpose_47, False, True) + del transpose_46 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_34 = paddle._C_ops.add(matmul_86, add_23) + del matmul_86 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_11 = paddle._C_ops.softmax(add_34, -1) + del add_34 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_87 = paddle._C_ops.matmul(dropout_82, transpose_48, False, False) + del dropout_82 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_49 = paddle._C_ops.transpose(matmul_87, [0, 2, 1, 3]) + del matmul_87 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_49, full_int_array_4) + del transpose_49 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_88 = paddle._C_ops.matmul(reshape_47, parameter_44, False, False) + del parameter_44, reshape_47 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_88, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_88 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_35 = paddle._C_ops.add(add_33, dropout_84) + del add_33, dropout_84 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_21 = paddle._C_ops.pow(add_35, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_21 = paddle._C_ops.mean(pow_21, full_int_array_1, True) + del pow_21 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_37 = paddle._C_ops.scale(mean_21, full_3, float("1e-06"), True) + del mean_21 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_21 = paddle._C_ops.rsqrt(scale_37) + del scale_37 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_43 = paddle._C_ops.multiply(add_35, rsqrt_21) + del rsqrt_21 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_44 = paddle._C_ops.multiply(parameter_40, multiply_43) + del multiply_43, parameter_40 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_89 = paddle._C_ops.matmul(multiply_44, parameter_42, False, False) + del multiply_44, parameter_42 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_8 = paddle._C_ops.relu(matmul_89) + del matmul_89 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_8, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_8 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_90 = paddle._C_ops.matmul(dropout_86, parameter_41, False, False) + del dropout_86, parameter_41 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_90, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_90 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_36 = paddle._C_ops.add(dropout_88, add_35) + del add_35, dropout_88 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_22 = paddle._C_ops.pow(add_36, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_22 = paddle._C_ops.mean(pow_22, full_int_array_1, True) + del pow_22 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_38 = paddle._C_ops.scale(mean_22, full_3, float("1e-06"), True) + del mean_22 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_22 = paddle._C_ops.rsqrt(scale_38) + del scale_38 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_45 = paddle._C_ops.multiply(add_36, rsqrt_22) + del rsqrt_22 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_46 = paddle._C_ops.multiply(parameter_35, multiply_45) + del multiply_45, parameter_35 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_91 = paddle._C_ops.matmul(multiply_46, parameter_39, False, False) + del parameter_39 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(matmul_91, full_int_array_2) + del matmul_91 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_50 = paddle._C_ops.transpose(reshape_48, [0, 2, 1, 3]) + del reshape_48 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_92 = paddle._C_ops.matmul(multiply_46, parameter_38, False, False) + del parameter_38 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(matmul_92, full_int_array_2) + del matmul_92 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_51 = paddle._C_ops.transpose(reshape_49, [0, 2, 1, 3]) + del reshape_49 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_93 = paddle._C_ops.matmul(multiply_46, parameter_37, False, False) + del multiply_46, parameter_37 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(matmul_93, full_int_array_2) + del matmul_93 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_52 = paddle._C_ops.transpose(reshape_50, [0, 2, 1, 3]) + del reshape_50 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_94 = paddle._C_ops.matmul(transpose_50, transpose_51, False, True) + del transpose_50 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_37 = paddle._C_ops.add(matmul_94, add_20) + del matmul_94 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_12 = paddle._C_ops.softmax(add_37, -1) + del add_37 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_12, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_12 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_95 = paddle._C_ops.matmul(dropout_90, transpose_52, False, False) + del dropout_90 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_53 = paddle._C_ops.transpose(matmul_95, [0, 2, 1, 3]) + del matmul_95 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_51 = paddle._C_ops.reshape(transpose_53, full_int_array_4) + del transpose_53 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_96 = paddle._C_ops.matmul(reshape_51, parameter_36, False, False) + del parameter_36, reshape_51 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_96, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_96 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_38 = paddle._C_ops.add(add_36, dropout_92) + del add_36, dropout_92 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_23 = paddle._C_ops.pow(add_38, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_23 = paddle._C_ops.mean(pow_23, full_int_array_1, True) + del pow_23 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_39 = paddle._C_ops.scale(mean_23, full_3, float("1e-06"), True) + del mean_23 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_23 = paddle._C_ops.rsqrt(scale_39) + del scale_39 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_47 = paddle._C_ops.multiply(add_38, rsqrt_23) + del rsqrt_23 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_48 = paddle._C_ops.multiply(parameter_30, multiply_47) + del multiply_47, parameter_30 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_97 = paddle._C_ops.matmul(multiply_48, parameter_34, False, False) + del multiply_48, parameter_34 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(matmul_97, full_int_array_2) + del matmul_97 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_54 = paddle._C_ops.transpose(reshape_52, [0, 2, 1, 3]) + del reshape_52 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_98 = paddle._C_ops.matmul(dropout_50, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(matmul_98, full_int_array_2) + del matmul_98 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_55 = paddle._C_ops.transpose(reshape_53, [0, 2, 1, 3]) + del reshape_53 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_99 = paddle._C_ops.matmul(dropout_50, parameter_32, False, False) + del parameter_32 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(matmul_99, full_int_array_2) + del matmul_99 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_56 = paddle._C_ops.transpose(reshape_54, [0, 2, 1, 3]) + del reshape_54 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_100 = paddle._C_ops.matmul(transpose_54, transpose_55, False, True) + del transpose_54 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_39 = paddle._C_ops.add(matmul_100, add_23) + del matmul_100 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_13 = paddle._C_ops.softmax(add_39, -1) + del add_39 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_13, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_13 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_101 = paddle._C_ops.matmul(dropout_94, transpose_56, False, False) + del dropout_94 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_57 = paddle._C_ops.transpose(matmul_101, [0, 2, 1, 3]) + del matmul_101 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(transpose_57, full_int_array_4) + del transpose_57 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_102 = paddle._C_ops.matmul(reshape_55, parameter_31, False, False) + del parameter_31, reshape_55 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_102, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_102 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_40 = paddle._C_ops.add(add_38, dropout_96) + del add_38, dropout_96 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_24 = paddle._C_ops.pow(add_40, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_24 = paddle._C_ops.mean(pow_24, full_int_array_1, True) + del pow_24 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_40 = paddle._C_ops.scale(mean_24, full_3, float("1e-06"), True) + del mean_24 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_24 = paddle._C_ops.rsqrt(scale_40) + del scale_40 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_49 = paddle._C_ops.multiply(add_40, rsqrt_24) + del rsqrt_24 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_50 = paddle._C_ops.multiply(parameter_27, multiply_49) + del multiply_49, parameter_27 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_103 = paddle._C_ops.matmul(multiply_50, parameter_29, False, False) + del multiply_50, parameter_29 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_9 = paddle._C_ops.relu(matmul_103) + del matmul_103 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_9, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_9 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_104 = paddle._C_ops.matmul(dropout_98, parameter_28, False, False) + del dropout_98, parameter_28 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_104, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_104 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_41 = paddle._C_ops.add(dropout_100, add_40) + del add_40, dropout_100 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_25 = paddle._C_ops.pow(add_41, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_25 = paddle._C_ops.mean(pow_25, full_int_array_1, True) + del pow_25 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_41 = paddle._C_ops.scale(mean_25, full_3, float("1e-06"), True) + del mean_25 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_25 = paddle._C_ops.rsqrt(scale_41) + del scale_41 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_51 = paddle._C_ops.multiply(add_41, rsqrt_25) + del rsqrt_25 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_52 = paddle._C_ops.multiply(parameter_22, multiply_51) + del multiply_51, parameter_22 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_105 = paddle._C_ops.matmul(multiply_52, parameter_26, False, False) + del parameter_26 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(matmul_105, full_int_array_2) + del matmul_105 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_58 = paddle._C_ops.transpose(reshape_56, [0, 2, 1, 3]) + del reshape_56 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_106 = paddle._C_ops.matmul(multiply_52, parameter_25, False, False) + del parameter_25 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(matmul_106, full_int_array_2) + del matmul_106 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_59 = paddle._C_ops.transpose(reshape_57, [0, 2, 1, 3]) + del reshape_57 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_107 = paddle._C_ops.matmul(multiply_52, parameter_24, False, False) + del multiply_52, parameter_24 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(matmul_107, full_int_array_2) + del matmul_107 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_60 = paddle._C_ops.transpose(reshape_58, [0, 2, 1, 3]) + del reshape_58 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_108 = paddle._C_ops.matmul(transpose_58, transpose_59, False, True) + del transpose_58 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_42 = paddle._C_ops.add(matmul_108, add_20) + del matmul_108 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_14 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_102, dropout_103 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_14, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_14 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_109 = paddle._C_ops.matmul(dropout_102, transpose_60, False, False) + del dropout_102 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_61 = paddle._C_ops.transpose(matmul_109, [0, 2, 1, 3]) + del matmul_109 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_59 = paddle._C_ops.reshape(transpose_61, full_int_array_4) + del transpose_61 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_110 = paddle._C_ops.matmul(reshape_59, parameter_23, False, False) + del parameter_23, reshape_59 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_104, dropout_105 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_110, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_110 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_43 = paddle._C_ops.add(add_41, dropout_104) + del add_41, dropout_104 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_26 = paddle._C_ops.pow(add_43, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_26 = paddle._C_ops.mean(pow_26, full_int_array_1, True) + del pow_26 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_42 = paddle._C_ops.scale(mean_26, full_3, float("1e-06"), True) + del mean_26 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_26 = paddle._C_ops.rsqrt(scale_42) + del scale_42 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_53 = paddle._C_ops.multiply(add_43, rsqrt_26) + del rsqrt_26 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_54 = paddle._C_ops.multiply(parameter_17, multiply_53) + del multiply_53, parameter_17 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_111 = paddle._C_ops.matmul(multiply_54, parameter_21, False, False) + del multiply_54, parameter_21 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(matmul_111, full_int_array_2) + del matmul_111 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_62 = paddle._C_ops.transpose(reshape_60, [0, 2, 1, 3]) + del reshape_60 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_112 = paddle._C_ops.matmul(dropout_50, parameter_20, False, False) + del parameter_20 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(matmul_112, full_int_array_2) + del matmul_112 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_63 = paddle._C_ops.transpose(reshape_61, [0, 2, 1, 3]) + del reshape_61 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_113 = paddle._C_ops.matmul(dropout_50, parameter_19, False, False) + del parameter_19 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(matmul_113, full_int_array_2) + del matmul_113 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_64 = paddle._C_ops.transpose(reshape_62, [0, 2, 1, 3]) + del reshape_62 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_114 = paddle._C_ops.matmul(transpose_62, transpose_63, False, True) + del transpose_62 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_44 = paddle._C_ops.add(matmul_114, add_23) + del matmul_114 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_15 = paddle._C_ops.softmax(add_44, -1) + del add_44 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_106, dropout_107 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_15, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_15 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_115 = paddle._C_ops.matmul(dropout_106, transpose_64, False, False) + del dropout_106 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_65 = paddle._C_ops.transpose(matmul_115, [0, 2, 1, 3]) + del matmul_115 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(transpose_65, full_int_array_4) + del transpose_65 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_116 = paddle._C_ops.matmul(reshape_63, parameter_18, False, False) + del parameter_18, reshape_63 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_108, dropout_109 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_116, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_116 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_45 = paddle._C_ops.add(add_43, dropout_108) + del add_43, dropout_108 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_27 = paddle._C_ops.pow(add_45, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_27 = paddle._C_ops.mean(pow_27, full_int_array_1, True) + del pow_27 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_43 = paddle._C_ops.scale(mean_27, full_3, float("1e-06"), True) + del mean_27 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_27 = paddle._C_ops.rsqrt(scale_43) + del scale_43 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_55 = paddle._C_ops.multiply(add_45, rsqrt_27) + del rsqrt_27 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_56 = paddle._C_ops.multiply(parameter_14, multiply_55) + del multiply_55, parameter_14 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_117 = paddle._C_ops.matmul(multiply_56, parameter_16, False, False) + del multiply_56, parameter_16 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_10 = paddle._C_ops.relu(matmul_117) + del matmul_117 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_110, dropout_111 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_10, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_10 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_118 = paddle._C_ops.matmul(dropout_110, parameter_15, False, False) + del dropout_110, parameter_15 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_112, dropout_113 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_118, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_118 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_46 = paddle._C_ops.add(dropout_112, add_45) + del add_45, dropout_112 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_28 = paddle._C_ops.pow(add_46, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_28 = paddle._C_ops.mean(pow_28, full_int_array_1, True) + del pow_28 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_44 = paddle._C_ops.scale(mean_28, full_3, float("1e-06"), True) + del mean_28 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_28 = paddle._C_ops.rsqrt(scale_44) + del scale_44 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_57 = paddle._C_ops.multiply(add_46, rsqrt_28) + del rsqrt_28 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_58 = paddle._C_ops.multiply(parameter_9, multiply_57) + del multiply_57, parameter_9 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_119 = paddle._C_ops.matmul(multiply_58, parameter_13, False, False) + del parameter_13 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(matmul_119, full_int_array_2) + del matmul_119 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_66 = paddle._C_ops.transpose(reshape_64, [0, 2, 1, 3]) + del reshape_64 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_120 = paddle._C_ops.matmul(multiply_58, parameter_12, False, False) + del parameter_12 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(matmul_120, full_int_array_2) + del matmul_120 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_67 = paddle._C_ops.transpose(reshape_65, [0, 2, 1, 3]) + del reshape_65 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_121 = paddle._C_ops.matmul(multiply_58, parameter_11, False, False) + del multiply_58, parameter_11 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(matmul_121, full_int_array_2) + del matmul_121 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_68 = paddle._C_ops.transpose(reshape_66, [0, 2, 1, 3]) + del reshape_66 + + # pd_op.matmul: (1x8x1x1xf32) <- (1x8x1x64xf32, 1x8x1x64xf32) + matmul_122 = paddle._C_ops.matmul(transpose_66, transpose_67, False, True) + del transpose_66 + + # pd_op.add: (1x8x1x1xf32) <- (1x8x1x1xf32, 1x8x1x1xf32) + add_47 = paddle._C_ops.add(matmul_122, add_20) + del add_20, matmul_122 + + # pd_op.softmax: (1x8x1x1xf32) <- (1x8x1x1xf32) + softmax_16 = paddle._C_ops.softmax(add_47, -1) + del add_47 + + # pd_op.dropout: (1x8x1x1xf32, 1x8x1x1xui8) <- (1x8x1x1xf32, None, 1xf32) + dropout_114, dropout_115 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_16, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_16 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x1xf32, 1x8x1x64xf32) + matmul_123 = paddle._C_ops.matmul(dropout_114, transpose_68, False, False) + del dropout_114 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_69 = paddle._C_ops.transpose(matmul_123, [0, 2, 1, 3]) + del matmul_123 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_67 = paddle._C_ops.reshape(transpose_69, full_int_array_4) + del transpose_69 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_124 = paddle._C_ops.matmul(reshape_67, parameter_10, False, False) + del parameter_10, reshape_67 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_116, dropout_117 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_124, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_124 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_48 = paddle._C_ops.add(add_46, dropout_116) + del add_46, dropout_116 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_29 = paddle._C_ops.pow(add_48, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_29 = paddle._C_ops.mean(pow_29, full_int_array_1, True) + del pow_29 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_45 = paddle._C_ops.scale(mean_29, full_3, float("1e-06"), True) + del mean_29 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_29 = paddle._C_ops.rsqrt(scale_45) + del scale_45 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_59 = paddle._C_ops.multiply(add_48, rsqrt_29) + del rsqrt_29 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_60 = paddle._C_ops.multiply(parameter_4, multiply_59) + del multiply_59, parameter_4 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_125 = paddle._C_ops.matmul(multiply_60, parameter_8, False, False) + del multiply_60, parameter_8 + + # pd_op.reshape: (1x1x8x64xf32) <- (1x1x512xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(matmul_125, full_int_array_2) + del matmul_125 + + # pd_op.transpose: (1x8x1x64xf32) <- (1x1x8x64xf32) + transpose_70 = paddle._C_ops.transpose(reshape_68, [0, 2, 1, 3]) + del reshape_68 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_126 = paddle._C_ops.matmul(dropout_50, parameter_7, False, False) + del parameter_7 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_69 = paddle._C_ops.reshape(matmul_126, full_int_array_2) + del matmul_126 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_71 = paddle._C_ops.transpose(reshape_69, [0, 2, 1, 3]) + del reshape_69 + + # pd_op.matmul: (1x20x512xf32) <- (1x20x512xf32, 512x512xf32) + matmul_127 = paddle._C_ops.matmul(dropout_50, parameter_6, False, False) + del parameter_6 + + # pd_op.reshape: (1x20x8x64xf32) <- (1x20x512xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(matmul_127, full_int_array_2) + del full_int_array_2, matmul_127 + + # pd_op.transpose: (1x8x20x64xf32) <- (1x20x8x64xf32) + transpose_72 = paddle._C_ops.transpose(reshape_70, [0, 2, 1, 3]) + del reshape_70 + + # pd_op.matmul: (1x8x1x20xf32) <- (1x8x1x64xf32, 1x8x20x64xf32) + matmul_128 = paddle._C_ops.matmul(transpose_70, transpose_71, False, True) + del transpose_70 + + # pd_op.add: (1x8x1x20xf32) <- (1x8x1x20xf32, 1x8x1x20xf32) + add_49 = paddle._C_ops.add(matmul_128, add_23) + del add_23, matmul_128 + + # pd_op.softmax: (1x8x1x20xf32) <- (1x8x1x20xf32) + softmax_17 = paddle._C_ops.softmax(add_49, -1) + del add_49 + + # pd_op.dropout: (1x8x1x20xf32, 1x8x1x20xui8) <- (1x8x1x20xf32, None, 1xf32) + dropout_118, dropout_119 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_17, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_17 + + # pd_op.matmul: (1x8x1x64xf32) <- (1x8x1x20xf32, 1x8x20x64xf32) + matmul_129 = paddle._C_ops.matmul(dropout_118, transpose_72, False, False) + del dropout_118 + + # pd_op.transpose: (1x1x8x64xf32) <- (1x8x1x64xf32) + transpose_73 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # pd_op.reshape: (1x1x512xf32) <- (1x1x8x64xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(transpose_73, full_int_array_4) + del full_int_array_4, transpose_73 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x512xf32, 512x512xf32) + matmul_130 = paddle._C_ops.matmul(reshape_71, parameter_5, False, False) + del parameter_5, reshape_71 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_120, dropout_121 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_130, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_130 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_50 = paddle._C_ops.add(add_48, dropout_120) + del add_48, dropout_120 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_30 = paddle._C_ops.pow(add_50, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_30 = paddle._C_ops.mean(pow_30, full_int_array_1, True) + del pow_30 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_46 = paddle._C_ops.scale(mean_30, full_3, float("1e-06"), True) + del mean_30 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_30 = paddle._C_ops.rsqrt(scale_46) + del scale_46 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_61 = paddle._C_ops.multiply(add_50, rsqrt_30) + del rsqrt_30 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_62 = paddle._C_ops.multiply(parameter_1, multiply_61) + del multiply_61, parameter_1 + + # pd_op.matmul: (1x1x2048xf32) <- (1x1x512xf32, 512x2048xf32) + matmul_131 = paddle._C_ops.matmul(multiply_62, parameter_3, False, False) + del multiply_62, parameter_3 + + # pd_op.relu: (1x1x2048xf32) <- (1x1x2048xf32) + relu_11 = paddle._C_ops.relu(matmul_131) + del matmul_131 + + # pd_op.dropout: (1x1x2048xf32, 1x1x2048xui8) <- (1x1x2048xf32, None, 1xf32) + dropout_122, dropout_123 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + relu_11, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del relu_11 + + # pd_op.matmul: (1x1x512xf32) <- (1x1x2048xf32, 2048x512xf32) + matmul_132 = paddle._C_ops.matmul(dropout_122, parameter_2, False, False) + del dropout_122, parameter_2 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_124, dropout_125 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_132, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_132 + + # pd_op.add: (1x1x512xf32) <- (1x1x512xf32, 1x1x512xf32) + add_51 = paddle._C_ops.add(dropout_124, add_50) + del add_50, dropout_124 + + # pd_op.pow: (1x1x512xf32) <- (1x1x512xf32) + pow_31 = paddle._C_ops.pow(add_51, float("2")) + + # pd_op.mean: (1x1x1xf32) <- (1x1x512xf32, 1xi64) + mean_31 = paddle._C_ops.mean(pow_31, full_int_array_1, True) + del full_int_array_1, pow_31 + + # pd_op.scale: (1x1x1xf32) <- (1x1x1xf32, 1xf32) + scale_47 = paddle._C_ops.scale(mean_31, full_3, float("1e-06"), True) + del full_3, mean_31 + + # pd_op.rsqrt: (1x1x1xf32) <- (1x1x1xf32) + rsqrt_31 = paddle._C_ops.rsqrt(scale_47) + del scale_47 + + # pd_op.multiply: (1x1x512xf32) <- (1x1x512xf32, 1x1x1xf32) + multiply_63 = paddle._C_ops.multiply(add_51, rsqrt_31) + del add_51, rsqrt_31 + + # pd_op.multiply: (1x1x512xf32) <- (512xf32, 1x1x512xf32) + multiply_64 = paddle._C_ops.multiply(parameter_0, multiply_63) + del multiply_63, parameter_0 + + # pd_op.dropout: (1x1x512xf32, 1x1x512xui8) <- (1x1x512xf32, None, 1xf32) + dropout_126, dropout_127 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + multiply_64, None, full_2, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_2, multiply_64 + + # pd_op.full: (1xf32) <- () + full_21 = paddle._C_ops.full( + [1], float("0.0441942"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x1x512xf32) <- (1x1x512xf32, 1xf32) + scale_48 = paddle._C_ops.scale(dropout_126, full_21, float("0"), True) + del dropout_126, full_21 + + # pd_op.matmul: (1x1x32128xf32) <- (1x1x512xf32, 32128x512xf32) + matmul_0 = paddle._C_ops.matmul(scale_48, parameter_130, False, True) + del ( + dropout_50, + parameter_130, + scale_48, + transpose_26, + transpose_27, + transpose_31, + transpose_32, + transpose_35, + transpose_36, + transpose_39, + transpose_40, + transpose_43, + transpose_44, + transpose_47, + transpose_48, + transpose_51, + transpose_52, + transpose_55, + transpose_56, + transpose_59, + transpose_60, + transpose_63, + transpose_64, + transpose_67, + transpose_68, + transpose_71, + transpose_72, + ) + + return matmul_0 diff --git a/paddle_samples/PaddleNLP/t5-small/weight_meta.py b/paddle_samples/PaddleNLP/t5-small/weight_meta.py new file mode 100644 index 00000000..a5a523c2 --- /dev/null +++ b/paddle_samples/PaddleNLP/t5-small/weight_meta.py @@ -0,0 +1,1439 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [512] + dtype = "float32" + min_val = float("-0.000406265") + max_val = float("9.5625") + mean = float("0.238993") + std = float("0.528551") + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [512] + dtype = "float32" + min_val = float("-1.03125") + max_val = float("12.125") + mean = float("3.0563") + std = float("1.25819") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [2048, 512] + dtype = "float32" + min_val = float("-45.25") + max_val = float("24.5") + mean = float("0.000320316") + std = float("0.469186") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [512, 2048] + dtype = "float32" + min_val = float("-13.6875") + max_val = float("10.25") + mean = float("0.00696439") + std = float("0.82078") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [512] + dtype = "float32" + min_val = float("-0.164062") + max_val = float("1.52344") + mean = float("0.142325") + std = float("0.0974383") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [512, 512] + dtype = "float32" + min_val = float("-20.5") + max_val = float("19.25") + mean = float("-0.00232446") + std = float("1.47252") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [512, 512] + dtype = "float32" + min_val = float("-12.3125") + max_val = float("10.8125") + mean = float("-0.00313481") + std = float("1.79793") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.03125") + max_val = float("4.71875") + mean = float("0.000654718") + std = float("0.387374") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.233398") + max_val = float("0.21875") + mean = float("-3.2276e-05") + std = float("0.0427395") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [512] + dtype = "float32" + min_val = float("-0.341797") + max_val = float("0.789062") + mean = float("0.221493") + std = float("0.079524") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [512, 512] + dtype = "float32" + min_val = float("-42.75") + max_val = float("45.75") + mean = float("-0.00122536") + std = float("1.53928") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.53125") + max_val = float("5.96875") + mean = float("0.00298593") + std = float("0.953516") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.5") + max_val = float("2.32812") + mean = float("0.000342449") + std = float("0.394908") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.261719") + max_val = float("0.285156") + mean = float("0.000135268") + std = float("0.0427585") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [512] + dtype = "float32" + min_val = float("0.375") + max_val = float("7.65625") + mean = float("2.4388") + std = float("0.69339") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [2048, 512] + dtype = "float32" + min_val = float("-16.25") + max_val = float("10.5") + mean = float("-0.000124255") + std = float("0.419801") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [512, 2048] + dtype = "float32" + min_val = float("-10.625") + max_val = float("10.5") + mean = float("0.0074399") + std = float("0.694377") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [512] + dtype = "float32" + min_val = float("-0.0810547") + max_val = float("0.464844") + mean = float("0.0716004") + std = float("0.0325611") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [512, 512] + dtype = "float32" + min_val = float("-25.5") + max_val = float("21.0") + mean = float("0.000646527") + std = float("1.26897") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [512, 512] + dtype = "float32" + min_val = float("-8.25") + max_val = float("7.75") + mean = float("-0.00116448") + std = float("1.22014") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.65625") + max_val = float("6.34375") + mean = float("0.000291407") + std = float("0.478891") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.255859") + max_val = float("0.271484") + mean = float("-0.000160606") + std = float("0.0556473") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [512] + dtype = "float32" + min_val = float("-0.0688477") + max_val = float("0.53125") + mean = float("0.1733") + std = float("0.0362951") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [512, 512] + dtype = "float32" + min_val = float("-48.5") + max_val = float("37.5") + mean = float("-0.00320207") + std = float("2.31977") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.875") + max_val = float("4.40625") + mean = float("0.0014279") + std = float("0.891554") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.70312") + max_val = float("2.4375") + mean = float("-0.000709436") + std = float("0.410992") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.306641") + max_val = float("0.324219") + mean = float("-1.23639e-05") + std = float("0.0434743") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [512] + dtype = "float32" + min_val = float("0.332031") + max_val = float("4.71875") + mean = float("1.9234") + std = float("0.349066") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [2048, 512] + dtype = "float32" + min_val = float("-10.0625") + max_val = float("10.6875") + mean = float("-0.00030282") + std = float("0.399087") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [512, 2048] + dtype = "float32" + min_val = float("-8.6875") + max_val = float("8.5625") + mean = float("0.0142616") + std = float("0.680091") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [512] + dtype = "float32" + min_val = float("-0.0791016") + max_val = float("0.435547") + mean = float("0.119905") + std = float("0.0375838") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [512, 512] + dtype = "float32" + min_val = float("-10.0625") + max_val = float("13.5625") + mean = float("9.86211e-05") + std = float("0.894709") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.0625") + max_val = float("6.59375") + mean = float("-0.00323875") + std = float("0.92144") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.65625") + max_val = float("4.5625") + mean = float("-0.00133923") + std = float("0.410349") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.279297") + max_val = float("0.263672") + mean = float("-2.56622e-05") + std = float("0.0439826") + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [512] + dtype = "float32" + min_val = float("-0.103516") + max_val = float("0.378906") + mean = float("0.154994") + std = float("0.0354193") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [512, 512] + dtype = "float32" + min_val = float("-21.75") + max_val = float("22.875") + mean = float("0.00349601") + std = float("1.43415") + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.0") + max_val = float("4.90625") + mean = float("-0.00221994") + std = float("0.958329") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.17188") + max_val = float("3.07812") + mean = float("-0.000142329") + std = float("0.418875") + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.339844") + max_val = float("0.439453") + mean = float("3.85278e-05") + std = float("0.0463164") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [512] + dtype = "float32" + min_val = float("0.310547") + max_val = float("3.09375") + mean = float("1.45117") + std = float("0.241399") + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [2048, 512] + dtype = "float32" + min_val = float("-14.1875") + max_val = float("9.0625") + mean = float("-0.000422012") + std = float("0.38754") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [512, 2048] + dtype = "float32" + min_val = float("-21.375") + max_val = float("18.0") + mean = float("0.019711") + std = float("0.736918") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [512] + dtype = "float32" + min_val = float("-0.0698242") + max_val = float("0.257812") + mean = float("0.0878949") + std = float("0.020732") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [512, 512] + dtype = "float32" + min_val = float("-9.3125") + max_val = float("9.375") + mean = float("-0.00120116") + std = float("0.733678") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.75") + max_val = float("3.73438") + mean = float("-0.000331075") + std = float("0.73921") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.8125") + max_val = float("5.46875") + mean = float("0.000311156") + std = float("0.392459") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.365234") + max_val = float("0.460938") + mean = float("-8.85896e-05") + std = float("0.0677922") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [512] + dtype = "float32" + min_val = float("-0.0869141") + max_val = float("0.304688") + mean = float("0.14316") + std = float("0.0355998") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [512, 512] + dtype = "float32" + min_val = float("-19.125") + max_val = float("23.375") + mean = float("0.000395767") + std = float("1.07214") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.40625") + max_val = float("4.71875") + mean = float("-0.00047506") + std = float("0.908469") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.25") + max_val = float("3.0625") + mean = float("-0.000608685") + std = float("0.406653") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.410156") + max_val = float("0.5") + mean = float("0.000177197") + std = float("0.0492003") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [512] + dtype = "float32" + min_val = float("0.304688") + max_val = float("2.26562") + mean = float("1.15984") + std = float("0.228704") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [2048, 512] + dtype = "float32" + min_val = float("-15.8125") + max_val = float("14.625") + mean = float("-0.00204277") + std = float("0.358594") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [512, 2048] + dtype = "float32" + min_val = float("-18.25") + max_val = float("25.25") + mean = float("0.0252502") + std = float("0.713721") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [512] + dtype = "float32" + min_val = float("-0.0786133") + max_val = float("0.198242") + mean = float("0.0967789") + std = float("0.0251307") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [512, 512] + dtype = "float32" + min_val = float("-7.5") + max_val = float("6.96875") + mean = float("-0.000955252") + std = float("0.651063") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.1875") + max_val = float("3.53125") + mean = float("-0.000889366") + std = float("0.636495") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.125") + max_val = float("4.84375") + mean = float("0.00154598") + std = float("0.351185") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.4375") + max_val = float("0.419922") + mean = float("-9.81584e-05") + std = float("0.0735168") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [512] + dtype = "float32" + min_val = float("0.0354004") + max_val = float("0.298828") + mean = float("0.131269") + std = float("0.0354128") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [512, 512] + dtype = "float32" + min_val = float("-17.0") + max_val = float("14.6875") + mean = float("0.00124212") + std = float("0.91165") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.98438") + max_val = float("3.8125") + mean = float("0.000928635") + std = float("0.724313") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.625") + max_val = float("2.48438") + mean = float("0.00132067") + std = float("0.429071") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.328125") + max_val = float("0.337891") + mean = float("-1.0113e-05") + std = float("0.0557238") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [512] + dtype = "float32" + min_val = float("0.113281") + max_val = float("5.25") + mean = float("0.67453") + std = float("0.326679") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [2048, 512] + dtype = "float32" + min_val = float("-10.1875") + max_val = float("7.375") + mean = float("-0.00203279") + std = float("0.335252") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [512, 2048] + dtype = "float32" + min_val = float("-15.75") + max_val = float("14.5625") + mean = float("0.014664") + std = float("0.619637") + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [512] + dtype = "float32" + min_val = float("0.022583") + max_val = float("0.71875") + mean = float("0.0890827") + std = float("0.0493023") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [512, 512] + dtype = "float32" + min_val = float("-12.125") + max_val = float("14.0625") + mean = float("-0.000343404") + std = float("0.716759") + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.5625") + max_val = float("2.28125") + mean = float("0.000567753") + std = float("0.469188") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.03125") + max_val = float("2.92188") + mean = float("-0.000232129") + std = float("0.392837") + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.34375") + max_val = float("0.306641") + mean = float("-0.000139535") + std = float("0.0599122") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [512] + dtype = "float32" + min_val = float("0.0385742") + max_val = float("0.476562") + mean = float("0.0893378") + std = float("0.0319224") + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [32, 8] + dtype = "float32" + min_val = float("-34.5") + max_val = float("48.0") + mean = float("-1.32047") + std = float("7.05675") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [512, 512] + dtype = "float32" + min_val = float("-12.5625") + max_val = float("13.75") + mean = float("0.000351022") + std = float("0.857384") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.45312") + max_val = float("2.39062") + mean = float("0.000828562") + std = float("0.489659") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.98438") + max_val = float("3.67188") + mean = float("0.00128421") + std = float("0.559587") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.511719") + max_val = float("0.53125") + mean = float("-2.00758e-05") + std = float("0.0698939") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [512] + dtype = "float32" + min_val = float("0.0319824") + max_val = float("0.355469") + mean = float("0.241696") + std = float("0.0792719") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [512] + dtype = "float32" + min_val = float("-0.239258") + max_val = float("3.54688") + mean = float("0.612829") + std = float("0.2203") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [2048, 512] + dtype = "float32" + min_val = float("-12.5") + max_val = float("11.1875") + mean = float("0.00199515") + std = float("0.555518") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [512, 2048] + dtype = "float32" + min_val = float("-68.5") + max_val = float("51.75") + mean = float("-0.00369408") + std = float("0.930026") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [512] + dtype = "float32" + min_val = float("0.0234375") + max_val = float("0.15625") + mean = float("0.119119") + std = float("0.0214491") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [512, 512] + dtype = "float32" + min_val = float("-20.875") + max_val = float("22.875") + mean = float("0.00219397") + std = float("1.72905") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [512, 512] + dtype = "float32" + min_val = float("-5.0625") + max_val = float("5.25") + mean = float("0.0022758") + std = float("0.896258") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.4375") + max_val = float("3.79688") + mean = float("-0.00106126") + std = float("0.436514") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.482422") + max_val = float("0.554688") + mean = float("-1.9731e-05") + std = float("0.0540952") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [512] + dtype = "float32" + min_val = float("0.246094") + max_val = float("4.75") + mean = float("0.756424") + std = float("0.255841") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [2048, 512] + dtype = "float32" + min_val = float("-13.4375") + max_val = float("16.75") + mean = float("0.00097069") + std = float("0.486232") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [512, 2048] + dtype = "float32" + min_val = float("-27.125") + max_val = float("30.25") + mean = float("-0.00128162") + std = float("0.900071") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [512] + dtype = "float32" + min_val = float("0.0275879") + max_val = float("0.177734") + mean = float("0.117132") + std = float("0.0188376") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [512, 512] + dtype = "float32" + min_val = float("-18.625") + max_val = float("14.6875") + mean = float("0.00229923") + std = float("1.33635") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.1875") + max_val = float("4.0625") + mean = float("0.000259673") + std = float("0.769611") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.375") + max_val = float("4.375") + mean = float("-0.0004771") + std = float("0.450516") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.558594") + max_val = float("0.648438") + mean = float("-0.000177306") + std = float("0.054454") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [512] + dtype = "float32" + min_val = float("0.226562") + max_val = float("4.09375") + mean = float("0.753399") + std = float("0.21234") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [2048, 512] + dtype = "float32" + min_val = float("-14.5625") + max_val = float("14.75") + mean = float("0.00074136") + std = float("0.439012") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [512, 2048] + dtype = "float32" + min_val = float("-15.3125") + max_val = float("18.875") + mean = float("0.00268636") + std = float("0.815878") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [512] + dtype = "float32" + min_val = float("-0.032959") + max_val = float("0.201172") + mean = float("0.11948") + std = float("0.0203831") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [512, 512] + dtype = "float32" + min_val = float("-16.375") + max_val = float("13.8125") + mean = float("0.000326929") + std = float("1.02065") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.45312") + max_val = float("3.75") + mean = float("0.00176018") + std = float("0.680957") + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.92188") + max_val = float("2.78125") + mean = float("-6.96151e-05") + std = float("0.44855") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.390625") + max_val = float("0.375") + mean = float("-2.9371e-05") + std = float("0.0547455") + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [512] + dtype = "float32" + min_val = float("0.203125") + max_val = float("2.5") + mean = float("0.613131") + std = float("0.150112") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [2048, 512] + dtype = "float32" + min_val = float("-11.5625") + max_val = float("7.3125") + mean = float("0.000362642") + std = float("0.406834") + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [512, 2048] + dtype = "float32" + min_val = float("-25.0") + max_val = float("34.5") + mean = float("0.00201512") + std = float("0.786157") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [512] + dtype = "float32" + min_val = float("0.0332031") + max_val = float("0.298828") + mean = float("0.136475") + std = float("0.0285545") + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [512, 512] + dtype = "float32" + min_val = float("-9.8125") + max_val = float("11.125") + mean = float("0.00299521") + std = float("0.807871") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [512, 512] + dtype = "float32" + min_val = float("-4.125") + max_val = float("5.1875") + mean = float("-0.000725675") + std = float("0.625774") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.76562") + max_val = float("2.57812") + mean = float("0.000150539") + std = float("0.378036") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.402344") + max_val = float("0.375") + mean = float("5.61201e-05") + std = float("0.0449669") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [512] + dtype = "float32" + min_val = float("0.137695") + max_val = float("1.57031") + mean = float("0.405777") + std = float("0.124192") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [2048, 512] + dtype = "float32" + min_val = float("-9.5") + max_val = float("15.0") + mean = float("-2.43034e-05") + std = float("0.360743") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [512, 2048] + dtype = "float32" + min_val = float("-16.75") + max_val = float("19.625") + mean = float("0.000799844") + std = float("0.758569") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [512] + dtype = "float32" + min_val = float("0.0291748") + max_val = float("0.361328") + mean = float("0.102077") + std = float("0.0278308") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [512, 512] + dtype = "float32" + min_val = float("-12.875") + max_val = float("12.875") + mean = float("0.00135863") + std = float("0.811788") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.5") + max_val = float("3.28125") + mean = float("-3.46814e-05") + std = float("0.590862") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.17188") + max_val = float("2.71875") + mean = float("-0.000842045") + std = float("0.46493") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.392578") + max_val = float("0.492188") + mean = float("-3.10072e-05") + std = float("0.0599718") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [512] + dtype = "float32" + min_val = float("0.0737305") + max_val = float("3.6875") + mean = float("0.30554") + std = float("0.221261") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [2048, 512] + dtype = "float32" + min_val = float("-8.1875") + max_val = float("8.9375") + mean = float("0.000126899") + std = float("0.305008") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [512, 2048] + dtype = "float32" + min_val = float("-16.625") + max_val = float("14.5") + mean = float("-0.000737352") + std = float("0.615552") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [512] + dtype = "float32" + min_val = float("0.0388184") + max_val = float("0.380859") + mean = float("0.0923548") + std = float("0.0335667") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [32, 8] + dtype = "float32" + min_val = float("-10.8125") + max_val = float("6.125") + mean = float("-0.748865") + std = float("2.72737") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [512, 512] + dtype = "float32" + min_val = float("-9.6875") + max_val = float("12.5625") + mean = float("-0.00112105") + std = float("0.637433") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [512, 512] + dtype = "float32" + min_val = float("-2.875") + max_val = float("2.32812") + mean = float("0.000568047") + std = float("0.449698") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [512, 512] + dtype = "float32" + min_val = float("-3.5") + max_val = float("4.0") + mean = float("-6.72496e-06") + std = float("0.4878") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [512, 512] + dtype = "float32" + min_val = float("-0.376953") + max_val = float("0.439453") + mean = float("0.000162044") + std = float("0.0597395") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [32128, 512] + dtype = "float32" + min_val = float("-792.0") + max_val = float("348.0") + mean = float("0.122392") + std = float("23.1937") + data = None