diff --git a/samples/timm/aimv2_1b_patch14_224.apple_pt/graph_hash.txt b/samples/timm/aimv2_1b_patch14_224.apple_pt/graph_hash.txt new file mode 100644 index 000000000..856fd6ca0 --- /dev/null +++ b/samples/timm/aimv2_1b_patch14_224.apple_pt/graph_hash.txt @@ -0,0 +1 @@ +ff10075e46eeae7615c783a3970e04ae79c1ea3f5b6020b167bec1e19489372f \ No newline at end of file diff --git a/samples/timm/aimv2_1b_patch14_224.apple_pt/graph_net.json b/samples/timm/aimv2_1b_patch14_224.apple_pt/graph_net.json new file mode 100644 index 000000000..1373fe3b5 --- /dev/null +++ b/samples/timm/aimv2_1b_patch14_224.apple_pt/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/samples/timm/aimv2_1b_patch14_224.apple_pt/input_meta.py b/samples/timm/aimv2_1b_patch14_224.apple_pt/input_meta.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/timm/aimv2_1b_patch14_224.apple_pt/input_tensor_constraints.py b/samples/timm/aimv2_1b_patch14_224.apple_pt/input_tensor_constraints.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/timm/aimv2_1b_patch14_224.apple_pt/model.py b/samples/timm/aimv2_1b_patch14_224.apple_pt/model.py new file mode 100644 index 000000000..3007385cc --- /dev/null +++ b/samples/timm/aimv2_1b_patch14_224.apple_pt/model.py @@ -0,0 +1,2663 @@ +import torch + + +class GraphModule(torch.nn.Module): + def forward( + self, + L_x_: torch.Tensor, + L_self_modules_patch_embed_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_patch_embed_modules_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_patch_embed_modules_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_parameters_pos_embed_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_num_prefix_tokens: torch.SymInt, + ): + l_x_ = L_x_ + l_self_modules_patch_embed_modules_proj_parameters_weight_ = ( + L_self_modules_patch_embed_modules_proj_parameters_weight_ + ) + l_self_modules_patch_embed_modules_proj_parameters_bias_ = ( + L_self_modules_patch_embed_modules_proj_parameters_bias_ + ) + l_self_modules_patch_embed_modules_norm_parameters_weight_ = ( + L_self_modules_patch_embed_modules_norm_parameters_weight_ + ) + l_self_parameters_pos_embed_ = L_self_parameters_pos_embed_ + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_norm_parameters_weight_ = L_self_modules_norm_parameters_weight_ + l_self_num_prefix_tokens = L_self_num_prefix_tokens + x = torch.conv2d( + l_x_, + l_self_modules_patch_embed_modules_proj_parameters_weight_, + l_self_modules_patch_embed_modules_proj_parameters_bias_, + (14, 14), + (0, 0), + (1, 1), + 1, + ) + l_x_ = ( + l_self_modules_patch_embed_modules_proj_parameters_weight_ + ) = l_self_modules_patch_embed_modules_proj_parameters_bias_ = None + flatten = x.flatten(2) + x = None + x_1 = flatten.transpose(1, 2) + flatten = None + x_2 = torch.rms_norm( + x_1, + (2048,), + l_self_modules_patch_embed_modules_norm_parameters_weight_, + 1e-05, + ) + x_1 = l_self_modules_patch_embed_modules_norm_parameters_weight_ = None + x_3 = x_2 + l_self_parameters_pos_embed_ + x_2 = l_self_parameters_pos_embed_ = None + x_4 = torch.nn.functional.dropout(x_3, 0.0, False, False) + x_3 = None + x_5 = torch.rms_norm( + x_4, + (2048,), + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ = None + linear = torch._C._nn.linear( + x_5, + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_5 = ( + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape = linear.reshape(1, 256, 3, 16, 128) + linear = None + qkv = reshape.permute(2, 0, 3, 1, 4) + reshape = None + unbind = qkv.unbind(0) + qkv = None + q = unbind[0] + k = unbind[1] + v = unbind[2] + unbind = None + x_6 = torch._C._nn.scaled_dot_product_attention( + q, k, v, attn_mask=None, dropout_p=0.0 + ) + q = k = v = None + transpose_1 = x_6.transpose(1, 2) + x_6 = None + x_7 = transpose_1.reshape(1, 256, 2048) + transpose_1 = None + x_8 = torch._C._nn.linear( + x_7, + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_7 = ( + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ + ) = None + x_9 = torch.nn.functional.dropout(x_8, 0.0, False, False) + x_8 = None + x_10 = x_4 + x_9 + x_4 = x_9 = None + x_11 = torch.rms_norm( + x_10, + (2048,), + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ = None + x_gate = torch._C._nn.linear( + x_11, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_12 = torch._C._nn.linear( + x_11, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_11 = ( + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu = torch.nn.functional.silu(x_gate, inplace=False) + x_gate = None + x_13 = silu * x_12 + silu = x_12 = None + x_14 = torch.nn.functional.dropout(x_13, 0.0, False, False) + x_13 = None + x_15 = torch._C._nn.linear( + x_14, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_14 = ( + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_16 = torch.nn.functional.dropout(x_15, 0.0, False, False) + x_15 = None + x_17 = x_10 + x_16 + x_10 = x_16 = None + x_18 = torch.rms_norm( + x_17, + (2048,), + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ = None + linear_5 = torch._C._nn.linear( + x_18, + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_18 = ( + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_2 = linear_5.reshape(1, 256, 3, 16, 128) + linear_5 = None + qkv_1 = reshape_2.permute(2, 0, 3, 1, 4) + reshape_2 = None + unbind_1 = qkv_1.unbind(0) + qkv_1 = None + q_1 = unbind_1[0] + k_1 = unbind_1[1] + v_1 = unbind_1[2] + unbind_1 = None + x_19 = torch._C._nn.scaled_dot_product_attention( + q_1, k_1, v_1, attn_mask=None, dropout_p=0.0 + ) + q_1 = k_1 = v_1 = None + transpose_2 = x_19.transpose(1, 2) + x_19 = None + x_20 = transpose_2.reshape(1, 256, 2048) + transpose_2 = None + x_21 = torch._C._nn.linear( + x_20, + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_20 = ( + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ + ) = None + x_22 = torch.nn.functional.dropout(x_21, 0.0, False, False) + x_21 = None + x_23 = x_17 + x_22 + x_17 = x_22 = None + x_24 = torch.rms_norm( + x_23, + (2048,), + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ = None + x_gate_1 = torch._C._nn.linear( + x_24, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_25 = torch._C._nn.linear( + x_24, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_24 = ( + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_1 = torch.nn.functional.silu(x_gate_1, inplace=False) + x_gate_1 = None + x_26 = silu_1 * x_25 + silu_1 = x_25 = None + x_27 = torch.nn.functional.dropout(x_26, 0.0, False, False) + x_26 = None + x_28 = torch._C._nn.linear( + x_27, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_27 = ( + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_29 = torch.nn.functional.dropout(x_28, 0.0, False, False) + x_28 = None + x_30 = x_23 + x_29 + x_23 = x_29 = None + x_31 = torch.rms_norm( + x_30, + (2048,), + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ = None + linear_10 = torch._C._nn.linear( + x_31, + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_31 = ( + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_4 = linear_10.reshape(1, 256, 3, 16, 128) + linear_10 = None + qkv_2 = reshape_4.permute(2, 0, 3, 1, 4) + reshape_4 = None + unbind_2 = qkv_2.unbind(0) + qkv_2 = None + q_2 = unbind_2[0] + k_2 = unbind_2[1] + v_2 = unbind_2[2] + unbind_2 = None + x_32 = torch._C._nn.scaled_dot_product_attention( + q_2, k_2, v_2, attn_mask=None, dropout_p=0.0 + ) + q_2 = k_2 = v_2 = None + transpose_3 = x_32.transpose(1, 2) + x_32 = None + x_33 = transpose_3.reshape(1, 256, 2048) + transpose_3 = None + x_34 = torch._C._nn.linear( + x_33, + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_33 = ( + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ + ) = None + x_35 = torch.nn.functional.dropout(x_34, 0.0, False, False) + x_34 = None + x_36 = x_30 + x_35 + x_30 = x_35 = None + x_37 = torch.rms_norm( + x_36, + (2048,), + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ = None + x_gate_2 = torch._C._nn.linear( + x_37, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_38 = torch._C._nn.linear( + x_37, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_37 = ( + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_2 = torch.nn.functional.silu(x_gate_2, inplace=False) + x_gate_2 = None + x_39 = silu_2 * x_38 + silu_2 = x_38 = None + x_40 = torch.nn.functional.dropout(x_39, 0.0, False, False) + x_39 = None + x_41 = torch._C._nn.linear( + x_40, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_40 = ( + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_42 = torch.nn.functional.dropout(x_41, 0.0, False, False) + x_41 = None + x_43 = x_36 + x_42 + x_36 = x_42 = None + x_44 = torch.rms_norm( + x_43, + (2048,), + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ = None + linear_15 = torch._C._nn.linear( + x_44, + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_44 = ( + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_6 = linear_15.reshape(1, 256, 3, 16, 128) + linear_15 = None + qkv_3 = reshape_6.permute(2, 0, 3, 1, 4) + reshape_6 = None + unbind_3 = qkv_3.unbind(0) + qkv_3 = None + q_3 = unbind_3[0] + k_3 = unbind_3[1] + v_3 = unbind_3[2] + unbind_3 = None + x_45 = torch._C._nn.scaled_dot_product_attention( + q_3, k_3, v_3, attn_mask=None, dropout_p=0.0 + ) + q_3 = k_3 = v_3 = None + transpose_4 = x_45.transpose(1, 2) + x_45 = None + x_46 = transpose_4.reshape(1, 256, 2048) + transpose_4 = None + x_47 = torch._C._nn.linear( + x_46, + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_46 = ( + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ + ) = None + x_48 = torch.nn.functional.dropout(x_47, 0.0, False, False) + x_47 = None + x_49 = x_43 + x_48 + x_43 = x_48 = None + x_50 = torch.rms_norm( + x_49, + (2048,), + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ = None + x_gate_3 = torch._C._nn.linear( + x_50, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_51 = torch._C._nn.linear( + x_50, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_50 = ( + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_3 = torch.nn.functional.silu(x_gate_3, inplace=False) + x_gate_3 = None + x_52 = silu_3 * x_51 + silu_3 = x_51 = None + x_53 = torch.nn.functional.dropout(x_52, 0.0, False, False) + x_52 = None + x_54 = torch._C._nn.linear( + x_53, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_53 = ( + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_55 = torch.nn.functional.dropout(x_54, 0.0, False, False) + x_54 = None + x_56 = x_49 + x_55 + x_49 = x_55 = None + x_57 = torch.rms_norm( + x_56, + (2048,), + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ = None + linear_20 = torch._C._nn.linear( + x_57, + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_57 = ( + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_8 = linear_20.reshape(1, 256, 3, 16, 128) + linear_20 = None + qkv_4 = reshape_8.permute(2, 0, 3, 1, 4) + reshape_8 = None + unbind_4 = qkv_4.unbind(0) + qkv_4 = None + q_4 = unbind_4[0] + k_4 = unbind_4[1] + v_4 = unbind_4[2] + unbind_4 = None + x_58 = torch._C._nn.scaled_dot_product_attention( + q_4, k_4, v_4, attn_mask=None, dropout_p=0.0 + ) + q_4 = k_4 = v_4 = None + transpose_5 = x_58.transpose(1, 2) + x_58 = None + x_59 = transpose_5.reshape(1, 256, 2048) + transpose_5 = None + x_60 = torch._C._nn.linear( + x_59, + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_59 = ( + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ + ) = None + x_61 = torch.nn.functional.dropout(x_60, 0.0, False, False) + x_60 = None + x_62 = x_56 + x_61 + x_56 = x_61 = None + x_63 = torch.rms_norm( + x_62, + (2048,), + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ = None + x_gate_4 = torch._C._nn.linear( + x_63, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_64 = torch._C._nn.linear( + x_63, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_63 = ( + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_4 = torch.nn.functional.silu(x_gate_4, inplace=False) + x_gate_4 = None + x_65 = silu_4 * x_64 + silu_4 = x_64 = None + x_66 = torch.nn.functional.dropout(x_65, 0.0, False, False) + x_65 = None + x_67 = torch._C._nn.linear( + x_66, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_66 = ( + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_68 = torch.nn.functional.dropout(x_67, 0.0, False, False) + x_67 = None + x_69 = x_62 + x_68 + x_62 = x_68 = None + x_70 = torch.rms_norm( + x_69, + (2048,), + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ = None + linear_25 = torch._C._nn.linear( + x_70, + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_70 = ( + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_10 = linear_25.reshape(1, 256, 3, 16, 128) + linear_25 = None + qkv_5 = reshape_10.permute(2, 0, 3, 1, 4) + reshape_10 = None + unbind_5 = qkv_5.unbind(0) + qkv_5 = None + q_5 = unbind_5[0] + k_5 = unbind_5[1] + v_5 = unbind_5[2] + unbind_5 = None + x_71 = torch._C._nn.scaled_dot_product_attention( + q_5, k_5, v_5, attn_mask=None, dropout_p=0.0 + ) + q_5 = k_5 = v_5 = None + transpose_6 = x_71.transpose(1, 2) + x_71 = None + x_72 = transpose_6.reshape(1, 256, 2048) + transpose_6 = None + x_73 = torch._C._nn.linear( + x_72, + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_72 = ( + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ + ) = None + x_74 = torch.nn.functional.dropout(x_73, 0.0, False, False) + x_73 = None + x_75 = x_69 + x_74 + x_69 = x_74 = None + x_76 = torch.rms_norm( + x_75, + (2048,), + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ = None + x_gate_5 = torch._C._nn.linear( + x_76, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_77 = torch._C._nn.linear( + x_76, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_76 = ( + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_5 = torch.nn.functional.silu(x_gate_5, inplace=False) + x_gate_5 = None + x_78 = silu_5 * x_77 + silu_5 = x_77 = None + x_79 = torch.nn.functional.dropout(x_78, 0.0, False, False) + x_78 = None + x_80 = torch._C._nn.linear( + x_79, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_79 = ( + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_81 = torch.nn.functional.dropout(x_80, 0.0, False, False) + x_80 = None + x_82 = x_75 + x_81 + x_75 = x_81 = None + x_83 = torch.rms_norm( + x_82, + (2048,), + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ = None + linear_30 = torch._C._nn.linear( + x_83, + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_83 = ( + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_12 = linear_30.reshape(1, 256, 3, 16, 128) + linear_30 = None + qkv_6 = reshape_12.permute(2, 0, 3, 1, 4) + reshape_12 = None + unbind_6 = qkv_6.unbind(0) + qkv_6 = None + q_6 = unbind_6[0] + k_6 = unbind_6[1] + v_6 = unbind_6[2] + unbind_6 = None + x_84 = torch._C._nn.scaled_dot_product_attention( + q_6, k_6, v_6, attn_mask=None, dropout_p=0.0 + ) + q_6 = k_6 = v_6 = None + transpose_7 = x_84.transpose(1, 2) + x_84 = None + x_85 = transpose_7.reshape(1, 256, 2048) + transpose_7 = None + x_86 = torch._C._nn.linear( + x_85, + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_85 = ( + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ + ) = None + x_87 = torch.nn.functional.dropout(x_86, 0.0, False, False) + x_86 = None + x_88 = x_82 + x_87 + x_82 = x_87 = None + x_89 = torch.rms_norm( + x_88, + (2048,), + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ = None + x_gate_6 = torch._C._nn.linear( + x_89, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_90 = torch._C._nn.linear( + x_89, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_89 = ( + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_6 = torch.nn.functional.silu(x_gate_6, inplace=False) + x_gate_6 = None + x_91 = silu_6 * x_90 + silu_6 = x_90 = None + x_92 = torch.nn.functional.dropout(x_91, 0.0, False, False) + x_91 = None + x_93 = torch._C._nn.linear( + x_92, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_92 = ( + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_94 = torch.nn.functional.dropout(x_93, 0.0, False, False) + x_93 = None + x_95 = x_88 + x_94 + x_88 = x_94 = None + x_96 = torch.rms_norm( + x_95, + (2048,), + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ = None + linear_35 = torch._C._nn.linear( + x_96, + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_96 = ( + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_14 = linear_35.reshape(1, 256, 3, 16, 128) + linear_35 = None + qkv_7 = reshape_14.permute(2, 0, 3, 1, 4) + reshape_14 = None + unbind_7 = qkv_7.unbind(0) + qkv_7 = None + q_7 = unbind_7[0] + k_7 = unbind_7[1] + v_7 = unbind_7[2] + unbind_7 = None + x_97 = torch._C._nn.scaled_dot_product_attention( + q_7, k_7, v_7, attn_mask=None, dropout_p=0.0 + ) + q_7 = k_7 = v_7 = None + transpose_8 = x_97.transpose(1, 2) + x_97 = None + x_98 = transpose_8.reshape(1, 256, 2048) + transpose_8 = None + x_99 = torch._C._nn.linear( + x_98, + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_98 = ( + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ + ) = None + x_100 = torch.nn.functional.dropout(x_99, 0.0, False, False) + x_99 = None + x_101 = x_95 + x_100 + x_95 = x_100 = None + x_102 = torch.rms_norm( + x_101, + (2048,), + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ = None + x_gate_7 = torch._C._nn.linear( + x_102, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_103 = torch._C._nn.linear( + x_102, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_102 = ( + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_7 = torch.nn.functional.silu(x_gate_7, inplace=False) + x_gate_7 = None + x_104 = silu_7 * x_103 + silu_7 = x_103 = None + x_105 = torch.nn.functional.dropout(x_104, 0.0, False, False) + x_104 = None + x_106 = torch._C._nn.linear( + x_105, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_105 = ( + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_107 = torch.nn.functional.dropout(x_106, 0.0, False, False) + x_106 = None + x_108 = x_101 + x_107 + x_101 = x_107 = None + x_109 = torch.rms_norm( + x_108, + (2048,), + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ = None + linear_40 = torch._C._nn.linear( + x_109, + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_109 = ( + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_16 = linear_40.reshape(1, 256, 3, 16, 128) + linear_40 = None + qkv_8 = reshape_16.permute(2, 0, 3, 1, 4) + reshape_16 = None + unbind_8 = qkv_8.unbind(0) + qkv_8 = None + q_8 = unbind_8[0] + k_8 = unbind_8[1] + v_8 = unbind_8[2] + unbind_8 = None + x_110 = torch._C._nn.scaled_dot_product_attention( + q_8, k_8, v_8, attn_mask=None, dropout_p=0.0 + ) + q_8 = k_8 = v_8 = None + transpose_9 = x_110.transpose(1, 2) + x_110 = None + x_111 = transpose_9.reshape(1, 256, 2048) + transpose_9 = None + x_112 = torch._C._nn.linear( + x_111, + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_111 = ( + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ + ) = None + x_113 = torch.nn.functional.dropout(x_112, 0.0, False, False) + x_112 = None + x_114 = x_108 + x_113 + x_108 = x_113 = None + x_115 = torch.rms_norm( + x_114, + (2048,), + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ = None + x_gate_8 = torch._C._nn.linear( + x_115, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_116 = torch._C._nn.linear( + x_115, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_115 = ( + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_8 = torch.nn.functional.silu(x_gate_8, inplace=False) + x_gate_8 = None + x_117 = silu_8 * x_116 + silu_8 = x_116 = None + x_118 = torch.nn.functional.dropout(x_117, 0.0, False, False) + x_117 = None + x_119 = torch._C._nn.linear( + x_118, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_118 = ( + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_120 = torch.nn.functional.dropout(x_119, 0.0, False, False) + x_119 = None + x_121 = x_114 + x_120 + x_114 = x_120 = None + x_122 = torch.rms_norm( + x_121, + (2048,), + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ = None + linear_45 = torch._C._nn.linear( + x_122, + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_122 = ( + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_18 = linear_45.reshape(1, 256, 3, 16, 128) + linear_45 = None + qkv_9 = reshape_18.permute(2, 0, 3, 1, 4) + reshape_18 = None + unbind_9 = qkv_9.unbind(0) + qkv_9 = None + q_9 = unbind_9[0] + k_9 = unbind_9[1] + v_9 = unbind_9[2] + unbind_9 = None + x_123 = torch._C._nn.scaled_dot_product_attention( + q_9, k_9, v_9, attn_mask=None, dropout_p=0.0 + ) + q_9 = k_9 = v_9 = None + transpose_10 = x_123.transpose(1, 2) + x_123 = None + x_124 = transpose_10.reshape(1, 256, 2048) + transpose_10 = None + x_125 = torch._C._nn.linear( + x_124, + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_124 = ( + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ + ) = None + x_126 = torch.nn.functional.dropout(x_125, 0.0, False, False) + x_125 = None + x_127 = x_121 + x_126 + x_121 = x_126 = None + x_128 = torch.rms_norm( + x_127, + (2048,), + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ = None + x_gate_9 = torch._C._nn.linear( + x_128, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_129 = torch._C._nn.linear( + x_128, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_128 = ( + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_9 = torch.nn.functional.silu(x_gate_9, inplace=False) + x_gate_9 = None + x_130 = silu_9 * x_129 + silu_9 = x_129 = None + x_131 = torch.nn.functional.dropout(x_130, 0.0, False, False) + x_130 = None + x_132 = torch._C._nn.linear( + x_131, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_131 = ( + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_133 = torch.nn.functional.dropout(x_132, 0.0, False, False) + x_132 = None + x_134 = x_127 + x_133 + x_127 = x_133 = None + x_135 = torch.rms_norm( + x_134, + (2048,), + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ = None + linear_50 = torch._C._nn.linear( + x_135, + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_135 = ( + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_20 = linear_50.reshape(1, 256, 3, 16, 128) + linear_50 = None + qkv_10 = reshape_20.permute(2, 0, 3, 1, 4) + reshape_20 = None + unbind_10 = qkv_10.unbind(0) + qkv_10 = None + q_10 = unbind_10[0] + k_10 = unbind_10[1] + v_10 = unbind_10[2] + unbind_10 = None + x_136 = torch._C._nn.scaled_dot_product_attention( + q_10, k_10, v_10, attn_mask=None, dropout_p=0.0 + ) + q_10 = k_10 = v_10 = None + transpose_11 = x_136.transpose(1, 2) + x_136 = None + x_137 = transpose_11.reshape(1, 256, 2048) + transpose_11 = None + x_138 = torch._C._nn.linear( + x_137, + l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_137 = l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ = (None) + x_139 = torch.nn.functional.dropout(x_138, 0.0, False, False) + x_138 = None + x_140 = x_134 + x_139 + x_134 = x_139 = None + x_141 = torch.rms_norm( + x_140, + (2048,), + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ = None + x_gate_10 = torch._C._nn.linear( + x_141, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_142 = torch._C._nn.linear( + x_141, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_141 = l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_10 = torch.nn.functional.silu(x_gate_10, inplace=False) + x_gate_10 = None + x_143 = silu_10 * x_142 + silu_10 = x_142 = None + x_144 = torch.nn.functional.dropout(x_143, 0.0, False, False) + x_143 = None + x_145 = torch._C._nn.linear( + x_144, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_144 = ( + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_146 = torch.nn.functional.dropout(x_145, 0.0, False, False) + x_145 = None + x_147 = x_140 + x_146 + x_140 = x_146 = None + x_148 = torch.rms_norm( + x_147, + (2048,), + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ = None + linear_55 = torch._C._nn.linear( + x_148, + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_148 = ( + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_22 = linear_55.reshape(1, 256, 3, 16, 128) + linear_55 = None + qkv_11 = reshape_22.permute(2, 0, 3, 1, 4) + reshape_22 = None + unbind_11 = qkv_11.unbind(0) + qkv_11 = None + q_11 = unbind_11[0] + k_11 = unbind_11[1] + v_11 = unbind_11[2] + unbind_11 = None + x_149 = torch._C._nn.scaled_dot_product_attention( + q_11, k_11, v_11, attn_mask=None, dropout_p=0.0 + ) + q_11 = k_11 = v_11 = None + transpose_12 = x_149.transpose(1, 2) + x_149 = None + x_150 = transpose_12.reshape(1, 256, 2048) + transpose_12 = None + x_151 = torch._C._nn.linear( + x_150, + l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_150 = l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ = (None) + x_152 = torch.nn.functional.dropout(x_151, 0.0, False, False) + x_151 = None + x_153 = x_147 + x_152 + x_147 = x_152 = None + x_154 = torch.rms_norm( + x_153, + (2048,), + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ = None + x_gate_11 = torch._C._nn.linear( + x_154, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_155 = torch._C._nn.linear( + x_154, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_154 = l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_11 = torch.nn.functional.silu(x_gate_11, inplace=False) + x_gate_11 = None + x_156 = silu_11 * x_155 + silu_11 = x_155 = None + x_157 = torch.nn.functional.dropout(x_156, 0.0, False, False) + x_156 = None + x_158 = torch._C._nn.linear( + x_157, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_157 = ( + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_159 = torch.nn.functional.dropout(x_158, 0.0, False, False) + x_158 = None + x_160 = x_153 + x_159 + x_153 = x_159 = None + x_161 = torch.rms_norm( + x_160, + (2048,), + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ = None + linear_60 = torch._C._nn.linear( + x_161, + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_161 = ( + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_24 = linear_60.reshape(1, 256, 3, 16, 128) + linear_60 = None + qkv_12 = reshape_24.permute(2, 0, 3, 1, 4) + reshape_24 = None + unbind_12 = qkv_12.unbind(0) + qkv_12 = None + q_12 = unbind_12[0] + k_12 = unbind_12[1] + v_12 = unbind_12[2] + unbind_12 = None + x_162 = torch._C._nn.scaled_dot_product_attention( + q_12, k_12, v_12, attn_mask=None, dropout_p=0.0 + ) + q_12 = k_12 = v_12 = None + transpose_13 = x_162.transpose(1, 2) + x_162 = None + x_163 = transpose_13.reshape(1, 256, 2048) + transpose_13 = None + x_164 = torch._C._nn.linear( + x_163, + l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_163 = l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ = (None) + x_165 = torch.nn.functional.dropout(x_164, 0.0, False, False) + x_164 = None + x_166 = x_160 + x_165 + x_160 = x_165 = None + x_167 = torch.rms_norm( + x_166, + (2048,), + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ = None + x_gate_12 = torch._C._nn.linear( + x_167, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_168 = torch._C._nn.linear( + x_167, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_167 = l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_12 = torch.nn.functional.silu(x_gate_12, inplace=False) + x_gate_12 = None + x_169 = silu_12 * x_168 + silu_12 = x_168 = None + x_170 = torch.nn.functional.dropout(x_169, 0.0, False, False) + x_169 = None + x_171 = torch._C._nn.linear( + x_170, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_170 = ( + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_172 = torch.nn.functional.dropout(x_171, 0.0, False, False) + x_171 = None + x_173 = x_166 + x_172 + x_166 = x_172 = None + x_174 = torch.rms_norm( + x_173, + (2048,), + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ = None + linear_65 = torch._C._nn.linear( + x_174, + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_174 = ( + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_26 = linear_65.reshape(1, 256, 3, 16, 128) + linear_65 = None + qkv_13 = reshape_26.permute(2, 0, 3, 1, 4) + reshape_26 = None + unbind_13 = qkv_13.unbind(0) + qkv_13 = None + q_13 = unbind_13[0] + k_13 = unbind_13[1] + v_13 = unbind_13[2] + unbind_13 = None + x_175 = torch._C._nn.scaled_dot_product_attention( + q_13, k_13, v_13, attn_mask=None, dropout_p=0.0 + ) + q_13 = k_13 = v_13 = None + transpose_14 = x_175.transpose(1, 2) + x_175 = None + x_176 = transpose_14.reshape(1, 256, 2048) + transpose_14 = None + x_177 = torch._C._nn.linear( + x_176, + l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_176 = l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ = (None) + x_178 = torch.nn.functional.dropout(x_177, 0.0, False, False) + x_177 = None + x_179 = x_173 + x_178 + x_173 = x_178 = None + x_180 = torch.rms_norm( + x_179, + (2048,), + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ = None + x_gate_13 = torch._C._nn.linear( + x_180, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_181 = torch._C._nn.linear( + x_180, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_180 = l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_13 = torch.nn.functional.silu(x_gate_13, inplace=False) + x_gate_13 = None + x_182 = silu_13 * x_181 + silu_13 = x_181 = None + x_183 = torch.nn.functional.dropout(x_182, 0.0, False, False) + x_182 = None + x_184 = torch._C._nn.linear( + x_183, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_183 = ( + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_185 = torch.nn.functional.dropout(x_184, 0.0, False, False) + x_184 = None + x_186 = x_179 + x_185 + x_179 = x_185 = None + x_187 = torch.rms_norm( + x_186, + (2048,), + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ = None + linear_70 = torch._C._nn.linear( + x_187, + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_187 = ( + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_28 = linear_70.reshape(1, 256, 3, 16, 128) + linear_70 = None + qkv_14 = reshape_28.permute(2, 0, 3, 1, 4) + reshape_28 = None + unbind_14 = qkv_14.unbind(0) + qkv_14 = None + q_14 = unbind_14[0] + k_14 = unbind_14[1] + v_14 = unbind_14[2] + unbind_14 = None + x_188 = torch._C._nn.scaled_dot_product_attention( + q_14, k_14, v_14, attn_mask=None, dropout_p=0.0 + ) + q_14 = k_14 = v_14 = None + transpose_15 = x_188.transpose(1, 2) + x_188 = None + x_189 = transpose_15.reshape(1, 256, 2048) + transpose_15 = None + x_190 = torch._C._nn.linear( + x_189, + l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_189 = l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ = (None) + x_191 = torch.nn.functional.dropout(x_190, 0.0, False, False) + x_190 = None + x_192 = x_186 + x_191 + x_186 = x_191 = None + x_193 = torch.rms_norm( + x_192, + (2048,), + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ = None + x_gate_14 = torch._C._nn.linear( + x_193, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_194 = torch._C._nn.linear( + x_193, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_193 = l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_14 = torch.nn.functional.silu(x_gate_14, inplace=False) + x_gate_14 = None + x_195 = silu_14 * x_194 + silu_14 = x_194 = None + x_196 = torch.nn.functional.dropout(x_195, 0.0, False, False) + x_195 = None + x_197 = torch._C._nn.linear( + x_196, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_196 = ( + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_198 = torch.nn.functional.dropout(x_197, 0.0, False, False) + x_197 = None + x_199 = x_192 + x_198 + x_192 = x_198 = None + x_200 = torch.rms_norm( + x_199, + (2048,), + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ = None + linear_75 = torch._C._nn.linear( + x_200, + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_200 = ( + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_30 = linear_75.reshape(1, 256, 3, 16, 128) + linear_75 = None + qkv_15 = reshape_30.permute(2, 0, 3, 1, 4) + reshape_30 = None + unbind_15 = qkv_15.unbind(0) + qkv_15 = None + q_15 = unbind_15[0] + k_15 = unbind_15[1] + v_15 = unbind_15[2] + unbind_15 = None + x_201 = torch._C._nn.scaled_dot_product_attention( + q_15, k_15, v_15, attn_mask=None, dropout_p=0.0 + ) + q_15 = k_15 = v_15 = None + transpose_16 = x_201.transpose(1, 2) + x_201 = None + x_202 = transpose_16.reshape(1, 256, 2048) + transpose_16 = None + x_203 = torch._C._nn.linear( + x_202, + l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_202 = l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ = (None) + x_204 = torch.nn.functional.dropout(x_203, 0.0, False, False) + x_203 = None + x_205 = x_199 + x_204 + x_199 = x_204 = None + x_206 = torch.rms_norm( + x_205, + (2048,), + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ = None + x_gate_15 = torch._C._nn.linear( + x_206, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_207 = torch._C._nn.linear( + x_206, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_206 = l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_15 = torch.nn.functional.silu(x_gate_15, inplace=False) + x_gate_15 = None + x_208 = silu_15 * x_207 + silu_15 = x_207 = None + x_209 = torch.nn.functional.dropout(x_208, 0.0, False, False) + x_208 = None + x_210 = torch._C._nn.linear( + x_209, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_209 = ( + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_211 = torch.nn.functional.dropout(x_210, 0.0, False, False) + x_210 = None + x_212 = x_205 + x_211 + x_205 = x_211 = None + x_213 = torch.rms_norm( + x_212, + (2048,), + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ = None + linear_80 = torch._C._nn.linear( + x_213, + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_213 = ( + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_32 = linear_80.reshape(1, 256, 3, 16, 128) + linear_80 = None + qkv_16 = reshape_32.permute(2, 0, 3, 1, 4) + reshape_32 = None + unbind_16 = qkv_16.unbind(0) + qkv_16 = None + q_16 = unbind_16[0] + k_16 = unbind_16[1] + v_16 = unbind_16[2] + unbind_16 = None + x_214 = torch._C._nn.scaled_dot_product_attention( + q_16, k_16, v_16, attn_mask=None, dropout_p=0.0 + ) + q_16 = k_16 = v_16 = None + transpose_17 = x_214.transpose(1, 2) + x_214 = None + x_215 = transpose_17.reshape(1, 256, 2048) + transpose_17 = None + x_216 = torch._C._nn.linear( + x_215, + l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_215 = l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ = (None) + x_217 = torch.nn.functional.dropout(x_216, 0.0, False, False) + x_216 = None + x_218 = x_212 + x_217 + x_212 = x_217 = None + x_219 = torch.rms_norm( + x_218, + (2048,), + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ = None + x_gate_16 = torch._C._nn.linear( + x_219, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_220 = torch._C._nn.linear( + x_219, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_219 = l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_16 = torch.nn.functional.silu(x_gate_16, inplace=False) + x_gate_16 = None + x_221 = silu_16 * x_220 + silu_16 = x_220 = None + x_222 = torch.nn.functional.dropout(x_221, 0.0, False, False) + x_221 = None + x_223 = torch._C._nn.linear( + x_222, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_222 = ( + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_224 = torch.nn.functional.dropout(x_223, 0.0, False, False) + x_223 = None + x_225 = x_218 + x_224 + x_218 = x_224 = None + x_226 = torch.rms_norm( + x_225, + (2048,), + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ = None + linear_85 = torch._C._nn.linear( + x_226, + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_226 = ( + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_34 = linear_85.reshape(1, 256, 3, 16, 128) + linear_85 = None + qkv_17 = reshape_34.permute(2, 0, 3, 1, 4) + reshape_34 = None + unbind_17 = qkv_17.unbind(0) + qkv_17 = None + q_17 = unbind_17[0] + k_17 = unbind_17[1] + v_17 = unbind_17[2] + unbind_17 = None + x_227 = torch._C._nn.scaled_dot_product_attention( + q_17, k_17, v_17, attn_mask=None, dropout_p=0.0 + ) + q_17 = k_17 = v_17 = None + transpose_18 = x_227.transpose(1, 2) + x_227 = None + x_228 = transpose_18.reshape(1, 256, 2048) + transpose_18 = None + x_229 = torch._C._nn.linear( + x_228, + l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_228 = l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ = (None) + x_230 = torch.nn.functional.dropout(x_229, 0.0, False, False) + x_229 = None + x_231 = x_225 + x_230 + x_225 = x_230 = None + x_232 = torch.rms_norm( + x_231, + (2048,), + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ = None + x_gate_17 = torch._C._nn.linear( + x_232, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_233 = torch._C._nn.linear( + x_232, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_232 = l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_17 = torch.nn.functional.silu(x_gate_17, inplace=False) + x_gate_17 = None + x_234 = silu_17 * x_233 + silu_17 = x_233 = None + x_235 = torch.nn.functional.dropout(x_234, 0.0, False, False) + x_234 = None + x_236 = torch._C._nn.linear( + x_235, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_235 = ( + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_237 = torch.nn.functional.dropout(x_236, 0.0, False, False) + x_236 = None + x_238 = x_231 + x_237 + x_231 = x_237 = None + x_239 = torch.rms_norm( + x_238, + (2048,), + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ = None + linear_90 = torch._C._nn.linear( + x_239, + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_239 = ( + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_36 = linear_90.reshape(1, 256, 3, 16, 128) + linear_90 = None + qkv_18 = reshape_36.permute(2, 0, 3, 1, 4) + reshape_36 = None + unbind_18 = qkv_18.unbind(0) + qkv_18 = None + q_18 = unbind_18[0] + k_18 = unbind_18[1] + v_18 = unbind_18[2] + unbind_18 = None + x_240 = torch._C._nn.scaled_dot_product_attention( + q_18, k_18, v_18, attn_mask=None, dropout_p=0.0 + ) + q_18 = k_18 = v_18 = None + transpose_19 = x_240.transpose(1, 2) + x_240 = None + x_241 = transpose_19.reshape(1, 256, 2048) + transpose_19 = None + x_242 = torch._C._nn.linear( + x_241, + l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_241 = l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ = (None) + x_243 = torch.nn.functional.dropout(x_242, 0.0, False, False) + x_242 = None + x_244 = x_238 + x_243 + x_238 = x_243 = None + x_245 = torch.rms_norm( + x_244, + (2048,), + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ = None + x_gate_18 = torch._C._nn.linear( + x_245, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_246 = torch._C._nn.linear( + x_245, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_245 = l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_18 = torch.nn.functional.silu(x_gate_18, inplace=False) + x_gate_18 = None + x_247 = silu_18 * x_246 + silu_18 = x_246 = None + x_248 = torch.nn.functional.dropout(x_247, 0.0, False, False) + x_247 = None + x_249 = torch._C._nn.linear( + x_248, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_248 = ( + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_250 = torch.nn.functional.dropout(x_249, 0.0, False, False) + x_249 = None + x_251 = x_244 + x_250 + x_244 = x_250 = None + x_252 = torch.rms_norm( + x_251, + (2048,), + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ = None + linear_95 = torch._C._nn.linear( + x_252, + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_252 = ( + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_38 = linear_95.reshape(1, 256, 3, 16, 128) + linear_95 = None + qkv_19 = reshape_38.permute(2, 0, 3, 1, 4) + reshape_38 = None + unbind_19 = qkv_19.unbind(0) + qkv_19 = None + q_19 = unbind_19[0] + k_19 = unbind_19[1] + v_19 = unbind_19[2] + unbind_19 = None + x_253 = torch._C._nn.scaled_dot_product_attention( + q_19, k_19, v_19, attn_mask=None, dropout_p=0.0 + ) + q_19 = k_19 = v_19 = None + transpose_20 = x_253.transpose(1, 2) + x_253 = None + x_254 = transpose_20.reshape(1, 256, 2048) + transpose_20 = None + x_255 = torch._C._nn.linear( + x_254, + l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_254 = l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ = (None) + x_256 = torch.nn.functional.dropout(x_255, 0.0, False, False) + x_255 = None + x_257 = x_251 + x_256 + x_251 = x_256 = None + x_258 = torch.rms_norm( + x_257, + (2048,), + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ = None + x_gate_19 = torch._C._nn.linear( + x_258, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_259 = torch._C._nn.linear( + x_258, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_258 = l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_19 = torch.nn.functional.silu(x_gate_19, inplace=False) + x_gate_19 = None + x_260 = silu_19 * x_259 + silu_19 = x_259 = None + x_261 = torch.nn.functional.dropout(x_260, 0.0, False, False) + x_260 = None + x_262 = torch._C._nn.linear( + x_261, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_261 = ( + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_263 = torch.nn.functional.dropout(x_262, 0.0, False, False) + x_262 = None + x_264 = x_257 + x_263 + x_257 = x_263 = None + x_265 = torch.rms_norm( + x_264, + (2048,), + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ = None + linear_100 = torch._C._nn.linear( + x_265, + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_265 = ( + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_40 = linear_100.reshape(1, 256, 3, 16, 128) + linear_100 = None + qkv_20 = reshape_40.permute(2, 0, 3, 1, 4) + reshape_40 = None + unbind_20 = qkv_20.unbind(0) + qkv_20 = None + q_20 = unbind_20[0] + k_20 = unbind_20[1] + v_20 = unbind_20[2] + unbind_20 = None + x_266 = torch._C._nn.scaled_dot_product_attention( + q_20, k_20, v_20, attn_mask=None, dropout_p=0.0 + ) + q_20 = k_20 = v_20 = None + transpose_21 = x_266.transpose(1, 2) + x_266 = None + x_267 = transpose_21.reshape(1, 256, 2048) + transpose_21 = None + x_268 = torch._C._nn.linear( + x_267, + l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_267 = l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ = (None) + x_269 = torch.nn.functional.dropout(x_268, 0.0, False, False) + x_268 = None + x_270 = x_264 + x_269 + x_264 = x_269 = None + x_271 = torch.rms_norm( + x_270, + (2048,), + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ = None + x_gate_20 = torch._C._nn.linear( + x_271, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_272 = torch._C._nn.linear( + x_271, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_271 = l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_20 = torch.nn.functional.silu(x_gate_20, inplace=False) + x_gate_20 = None + x_273 = silu_20 * x_272 + silu_20 = x_272 = None + x_274 = torch.nn.functional.dropout(x_273, 0.0, False, False) + x_273 = None + x_275 = torch._C._nn.linear( + x_274, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_274 = ( + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_276 = torch.nn.functional.dropout(x_275, 0.0, False, False) + x_275 = None + x_277 = x_270 + x_276 + x_270 = x_276 = None + x_278 = torch.rms_norm( + x_277, + (2048,), + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ = None + linear_105 = torch._C._nn.linear( + x_278, + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_278 = ( + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_42 = linear_105.reshape(1, 256, 3, 16, 128) + linear_105 = None + qkv_21 = reshape_42.permute(2, 0, 3, 1, 4) + reshape_42 = None + unbind_21 = qkv_21.unbind(0) + qkv_21 = None + q_21 = unbind_21[0] + k_21 = unbind_21[1] + v_21 = unbind_21[2] + unbind_21 = None + x_279 = torch._C._nn.scaled_dot_product_attention( + q_21, k_21, v_21, attn_mask=None, dropout_p=0.0 + ) + q_21 = k_21 = v_21 = None + transpose_22 = x_279.transpose(1, 2) + x_279 = None + x_280 = transpose_22.reshape(1, 256, 2048) + transpose_22 = None + x_281 = torch._C._nn.linear( + x_280, + l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_280 = l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ = (None) + x_282 = torch.nn.functional.dropout(x_281, 0.0, False, False) + x_281 = None + x_283 = x_277 + x_282 + x_277 = x_282 = None + x_284 = torch.rms_norm( + x_283, + (2048,), + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ = None + x_gate_21 = torch._C._nn.linear( + x_284, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_285 = torch._C._nn.linear( + x_284, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_284 = l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_21 = torch.nn.functional.silu(x_gate_21, inplace=False) + x_gate_21 = None + x_286 = silu_21 * x_285 + silu_21 = x_285 = None + x_287 = torch.nn.functional.dropout(x_286, 0.0, False, False) + x_286 = None + x_288 = torch._C._nn.linear( + x_287, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_287 = ( + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_289 = torch.nn.functional.dropout(x_288, 0.0, False, False) + x_288 = None + x_290 = x_283 + x_289 + x_283 = x_289 = None + x_291 = torch.rms_norm( + x_290, + (2048,), + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ = None + linear_110 = torch._C._nn.linear( + x_291, + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_291 = ( + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_44 = linear_110.reshape(1, 256, 3, 16, 128) + linear_110 = None + qkv_22 = reshape_44.permute(2, 0, 3, 1, 4) + reshape_44 = None + unbind_22 = qkv_22.unbind(0) + qkv_22 = None + q_22 = unbind_22[0] + k_22 = unbind_22[1] + v_22 = unbind_22[2] + unbind_22 = None + x_292 = torch._C._nn.scaled_dot_product_attention( + q_22, k_22, v_22, attn_mask=None, dropout_p=0.0 + ) + q_22 = k_22 = v_22 = None + transpose_23 = x_292.transpose(1, 2) + x_292 = None + x_293 = transpose_23.reshape(1, 256, 2048) + transpose_23 = None + x_294 = torch._C._nn.linear( + x_293, + l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_293 = l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ = (None) + x_295 = torch.nn.functional.dropout(x_294, 0.0, False, False) + x_294 = None + x_296 = x_290 + x_295 + x_290 = x_295 = None + x_297 = torch.rms_norm( + x_296, + (2048,), + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ = None + x_gate_22 = torch._C._nn.linear( + x_297, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_298 = torch._C._nn.linear( + x_297, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_297 = l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_22 = torch.nn.functional.silu(x_gate_22, inplace=False) + x_gate_22 = None + x_299 = silu_22 * x_298 + silu_22 = x_298 = None + x_300 = torch.nn.functional.dropout(x_299, 0.0, False, False) + x_299 = None + x_301 = torch._C._nn.linear( + x_300, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_300 = ( + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_302 = torch.nn.functional.dropout(x_301, 0.0, False, False) + x_301 = None + x_303 = x_296 + x_302 + x_296 = x_302 = None + x_304 = torch.rms_norm( + x_303, + (2048,), + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ = None + linear_115 = torch._C._nn.linear( + x_304, + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_304 = ( + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_46 = linear_115.reshape(1, 256, 3, 16, 128) + linear_115 = None + qkv_23 = reshape_46.permute(2, 0, 3, 1, 4) + reshape_46 = None + unbind_23 = qkv_23.unbind(0) + qkv_23 = None + q_23 = unbind_23[0] + k_23 = unbind_23[1] + v_23 = unbind_23[2] + unbind_23 = None + x_305 = torch._C._nn.scaled_dot_product_attention( + q_23, k_23, v_23, attn_mask=None, dropout_p=0.0 + ) + q_23 = k_23 = v_23 = None + transpose_24 = x_305.transpose(1, 2) + x_305 = None + x_306 = transpose_24.reshape(1, 256, 2048) + transpose_24 = None + x_307 = torch._C._nn.linear( + x_306, + l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_306 = l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ = (None) + x_308 = torch.nn.functional.dropout(x_307, 0.0, False, False) + x_307 = None + x_309 = x_303 + x_308 + x_303 = x_308 = None + x_310 = torch.rms_norm( + x_309, + (2048,), + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ = None + x_gate_23 = torch._C._nn.linear( + x_310, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_311 = torch._C._nn.linear( + x_310, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_310 = l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_23 = torch.nn.functional.silu(x_gate_23, inplace=False) + x_gate_23 = None + x_312 = silu_23 * x_311 + silu_23 = x_311 = None + x_313 = torch.nn.functional.dropout(x_312, 0.0, False, False) + x_312 = None + x_314 = torch._C._nn.linear( + x_313, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_313 = ( + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_315 = torch.nn.functional.dropout(x_314, 0.0, False, False) + x_314 = None + x_316 = x_309 + x_315 + x_309 = x_315 = None + x_317 = torch.rms_norm( + x_316, (2048,), l_self_modules_norm_parameters_weight_, 1e-05 + ) + x_316 = l_self_modules_norm_parameters_weight_ = None + x_318 = x_317[ + (slice(None, None, None), slice(l_self_num_prefix_tokens, None, None)) + ] + x_317 = l_self_num_prefix_tokens = None + x_319 = x_318.mean(dim=1) + x_318 = None + x_320 = torch.nn.functional.dropout(x_319, 0.0, False, False) + x_319 = None + return (x_320,) diff --git a/samples/timm/aimv2_1b_patch14_224.apple_pt/weight_meta.py b/samples/timm/aimv2_1b_patch14_224.apple_pt/weight_meta.py new file mode 100644 index 000000000..41ff35ab7 --- /dev/null +++ b/samples/timm/aimv2_1b_patch14_224.apple_pt/weight_meta.py @@ -0,0 +1,1920 @@ +class Program_weight_tensor_meta_L_x_: + name = "L_x_" + shape = [1, 3, 224, 224] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.232 + std = 1.287 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_proj_parameters_weight_: + name = "L_self_modules_patch_embed_modules_proj_parameters_weight_" + shape = [2048, 3, 14, 14] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.024 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_proj_parameters_bias_: + name = "L_self_modules_patch_embed_modules_proj_parameters_bias_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.023 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_norm_parameters_weight_: + name = "L_self_modules_patch_embed_modules_norm_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_parameters_pos_embed_: + name = "L_self_parameters_pos_embed_" + shape = [1, 256, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_" + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [6144, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_" + ) + shape = [2048, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [5632, 2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_" + shape = [2048, 5632] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_norm_parameters_weight_: + name = "L_self_modules_norm_parameters_weight_" + shape = [2048] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_num_prefix_tokens: + name = "L_self_num_prefix_tokens" + shape = [] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + data = [4] diff --git a/samples/timm/aimv2_3b_patch14_224.apple_pt/graph_hash.txt b/samples/timm/aimv2_3b_patch14_224.apple_pt/graph_hash.txt new file mode 100644 index 000000000..6eb601bbf --- /dev/null +++ b/samples/timm/aimv2_3b_patch14_224.apple_pt/graph_hash.txt @@ -0,0 +1 @@ +60bb6d0dc13811c3e1ded64bb218dc7c0d655cf6f734a78d49d2f9bb22641c34 \ No newline at end of file diff --git a/samples/timm/aimv2_3b_patch14_224.apple_pt/graph_net.json b/samples/timm/aimv2_3b_patch14_224.apple_pt/graph_net.json new file mode 100644 index 000000000..1373fe3b5 --- /dev/null +++ b/samples/timm/aimv2_3b_patch14_224.apple_pt/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/samples/timm/aimv2_3b_patch14_224.apple_pt/input_meta.py b/samples/timm/aimv2_3b_patch14_224.apple_pt/input_meta.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/timm/aimv2_3b_patch14_224.apple_pt/input_tensor_constraints.py b/samples/timm/aimv2_3b_patch14_224.apple_pt/input_tensor_constraints.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/timm/aimv2_3b_patch14_224.apple_pt/model.py b/samples/timm/aimv2_3b_patch14_224.apple_pt/model.py new file mode 100644 index 000000000..6a8706999 --- /dev/null +++ b/samples/timm/aimv2_3b_patch14_224.apple_pt/model.py @@ -0,0 +1,2663 @@ +import torch + + +class GraphModule(torch.nn.Module): + def forward( + self, + L_x_: torch.Tensor, + L_self_modules_patch_embed_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_patch_embed_modules_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_patch_embed_modules_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_parameters_pos_embed_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_num_prefix_tokens: torch.SymInt, + ): + l_x_ = L_x_ + l_self_modules_patch_embed_modules_proj_parameters_weight_ = ( + L_self_modules_patch_embed_modules_proj_parameters_weight_ + ) + l_self_modules_patch_embed_modules_proj_parameters_bias_ = ( + L_self_modules_patch_embed_modules_proj_parameters_bias_ + ) + l_self_modules_patch_embed_modules_norm_parameters_weight_ = ( + L_self_modules_patch_embed_modules_norm_parameters_weight_ + ) + l_self_parameters_pos_embed_ = L_self_parameters_pos_embed_ + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_norm_parameters_weight_ = L_self_modules_norm_parameters_weight_ + l_self_num_prefix_tokens = L_self_num_prefix_tokens + x = torch.conv2d( + l_x_, + l_self_modules_patch_embed_modules_proj_parameters_weight_, + l_self_modules_patch_embed_modules_proj_parameters_bias_, + (14, 14), + (0, 0), + (1, 1), + 1, + ) + l_x_ = ( + l_self_modules_patch_embed_modules_proj_parameters_weight_ + ) = l_self_modules_patch_embed_modules_proj_parameters_bias_ = None + flatten = x.flatten(2) + x = None + x_1 = flatten.transpose(1, 2) + flatten = None + x_2 = torch.rms_norm( + x_1, + (3072,), + l_self_modules_patch_embed_modules_norm_parameters_weight_, + 1e-05, + ) + x_1 = l_self_modules_patch_embed_modules_norm_parameters_weight_ = None + x_3 = x_2 + l_self_parameters_pos_embed_ + x_2 = l_self_parameters_pos_embed_ = None + x_4 = torch.nn.functional.dropout(x_3, 0.0, False, False) + x_3 = None + x_5 = torch.rms_norm( + x_4, + (3072,), + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ = None + linear = torch._C._nn.linear( + x_5, + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_5 = ( + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape = linear.reshape(1, 256, 3, 24, 128) + linear = None + qkv = reshape.permute(2, 0, 3, 1, 4) + reshape = None + unbind = qkv.unbind(0) + qkv = None + q = unbind[0] + k = unbind[1] + v = unbind[2] + unbind = None + x_6 = torch._C._nn.scaled_dot_product_attention( + q, k, v, attn_mask=None, dropout_p=0.0 + ) + q = k = v = None + transpose_1 = x_6.transpose(1, 2) + x_6 = None + x_7 = transpose_1.reshape(1, 256, 3072) + transpose_1 = None + x_8 = torch._C._nn.linear( + x_7, + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_7 = ( + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ + ) = None + x_9 = torch.nn.functional.dropout(x_8, 0.0, False, False) + x_8 = None + x_10 = x_4 + x_9 + x_4 = x_9 = None + x_11 = torch.rms_norm( + x_10, + (3072,), + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ = None + x_gate = torch._C._nn.linear( + x_11, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_12 = torch._C._nn.linear( + x_11, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_11 = ( + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu = torch.nn.functional.silu(x_gate, inplace=False) + x_gate = None + x_13 = silu * x_12 + silu = x_12 = None + x_14 = torch.nn.functional.dropout(x_13, 0.0, False, False) + x_13 = None + x_15 = torch._C._nn.linear( + x_14, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_14 = ( + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_16 = torch.nn.functional.dropout(x_15, 0.0, False, False) + x_15 = None + x_17 = x_10 + x_16 + x_10 = x_16 = None + x_18 = torch.rms_norm( + x_17, + (3072,), + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ = None + linear_5 = torch._C._nn.linear( + x_18, + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_18 = ( + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_2 = linear_5.reshape(1, 256, 3, 24, 128) + linear_5 = None + qkv_1 = reshape_2.permute(2, 0, 3, 1, 4) + reshape_2 = None + unbind_1 = qkv_1.unbind(0) + qkv_1 = None + q_1 = unbind_1[0] + k_1 = unbind_1[1] + v_1 = unbind_1[2] + unbind_1 = None + x_19 = torch._C._nn.scaled_dot_product_attention( + q_1, k_1, v_1, attn_mask=None, dropout_p=0.0 + ) + q_1 = k_1 = v_1 = None + transpose_2 = x_19.transpose(1, 2) + x_19 = None + x_20 = transpose_2.reshape(1, 256, 3072) + transpose_2 = None + x_21 = torch._C._nn.linear( + x_20, + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_20 = ( + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ + ) = None + x_22 = torch.nn.functional.dropout(x_21, 0.0, False, False) + x_21 = None + x_23 = x_17 + x_22 + x_17 = x_22 = None + x_24 = torch.rms_norm( + x_23, + (3072,), + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ = None + x_gate_1 = torch._C._nn.linear( + x_24, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_25 = torch._C._nn.linear( + x_24, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_24 = ( + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_1 = torch.nn.functional.silu(x_gate_1, inplace=False) + x_gate_1 = None + x_26 = silu_1 * x_25 + silu_1 = x_25 = None + x_27 = torch.nn.functional.dropout(x_26, 0.0, False, False) + x_26 = None + x_28 = torch._C._nn.linear( + x_27, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_27 = ( + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_29 = torch.nn.functional.dropout(x_28, 0.0, False, False) + x_28 = None + x_30 = x_23 + x_29 + x_23 = x_29 = None + x_31 = torch.rms_norm( + x_30, + (3072,), + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ = None + linear_10 = torch._C._nn.linear( + x_31, + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_31 = ( + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_4 = linear_10.reshape(1, 256, 3, 24, 128) + linear_10 = None + qkv_2 = reshape_4.permute(2, 0, 3, 1, 4) + reshape_4 = None + unbind_2 = qkv_2.unbind(0) + qkv_2 = None + q_2 = unbind_2[0] + k_2 = unbind_2[1] + v_2 = unbind_2[2] + unbind_2 = None + x_32 = torch._C._nn.scaled_dot_product_attention( + q_2, k_2, v_2, attn_mask=None, dropout_p=0.0 + ) + q_2 = k_2 = v_2 = None + transpose_3 = x_32.transpose(1, 2) + x_32 = None + x_33 = transpose_3.reshape(1, 256, 3072) + transpose_3 = None + x_34 = torch._C._nn.linear( + x_33, + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_33 = ( + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ + ) = None + x_35 = torch.nn.functional.dropout(x_34, 0.0, False, False) + x_34 = None + x_36 = x_30 + x_35 + x_30 = x_35 = None + x_37 = torch.rms_norm( + x_36, + (3072,), + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ = None + x_gate_2 = torch._C._nn.linear( + x_37, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_38 = torch._C._nn.linear( + x_37, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_37 = ( + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_2 = torch.nn.functional.silu(x_gate_2, inplace=False) + x_gate_2 = None + x_39 = silu_2 * x_38 + silu_2 = x_38 = None + x_40 = torch.nn.functional.dropout(x_39, 0.0, False, False) + x_39 = None + x_41 = torch._C._nn.linear( + x_40, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_40 = ( + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_42 = torch.nn.functional.dropout(x_41, 0.0, False, False) + x_41 = None + x_43 = x_36 + x_42 + x_36 = x_42 = None + x_44 = torch.rms_norm( + x_43, + (3072,), + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ = None + linear_15 = torch._C._nn.linear( + x_44, + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_44 = ( + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_6 = linear_15.reshape(1, 256, 3, 24, 128) + linear_15 = None + qkv_3 = reshape_6.permute(2, 0, 3, 1, 4) + reshape_6 = None + unbind_3 = qkv_3.unbind(0) + qkv_3 = None + q_3 = unbind_3[0] + k_3 = unbind_3[1] + v_3 = unbind_3[2] + unbind_3 = None + x_45 = torch._C._nn.scaled_dot_product_attention( + q_3, k_3, v_3, attn_mask=None, dropout_p=0.0 + ) + q_3 = k_3 = v_3 = None + transpose_4 = x_45.transpose(1, 2) + x_45 = None + x_46 = transpose_4.reshape(1, 256, 3072) + transpose_4 = None + x_47 = torch._C._nn.linear( + x_46, + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_46 = ( + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ + ) = None + x_48 = torch.nn.functional.dropout(x_47, 0.0, False, False) + x_47 = None + x_49 = x_43 + x_48 + x_43 = x_48 = None + x_50 = torch.rms_norm( + x_49, + (3072,), + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ = None + x_gate_3 = torch._C._nn.linear( + x_50, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_51 = torch._C._nn.linear( + x_50, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_50 = ( + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_3 = torch.nn.functional.silu(x_gate_3, inplace=False) + x_gate_3 = None + x_52 = silu_3 * x_51 + silu_3 = x_51 = None + x_53 = torch.nn.functional.dropout(x_52, 0.0, False, False) + x_52 = None + x_54 = torch._C._nn.linear( + x_53, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_53 = ( + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_55 = torch.nn.functional.dropout(x_54, 0.0, False, False) + x_54 = None + x_56 = x_49 + x_55 + x_49 = x_55 = None + x_57 = torch.rms_norm( + x_56, + (3072,), + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ = None + linear_20 = torch._C._nn.linear( + x_57, + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_57 = ( + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_8 = linear_20.reshape(1, 256, 3, 24, 128) + linear_20 = None + qkv_4 = reshape_8.permute(2, 0, 3, 1, 4) + reshape_8 = None + unbind_4 = qkv_4.unbind(0) + qkv_4 = None + q_4 = unbind_4[0] + k_4 = unbind_4[1] + v_4 = unbind_4[2] + unbind_4 = None + x_58 = torch._C._nn.scaled_dot_product_attention( + q_4, k_4, v_4, attn_mask=None, dropout_p=0.0 + ) + q_4 = k_4 = v_4 = None + transpose_5 = x_58.transpose(1, 2) + x_58 = None + x_59 = transpose_5.reshape(1, 256, 3072) + transpose_5 = None + x_60 = torch._C._nn.linear( + x_59, + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_59 = ( + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ + ) = None + x_61 = torch.nn.functional.dropout(x_60, 0.0, False, False) + x_60 = None + x_62 = x_56 + x_61 + x_56 = x_61 = None + x_63 = torch.rms_norm( + x_62, + (3072,), + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ = None + x_gate_4 = torch._C._nn.linear( + x_63, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_64 = torch._C._nn.linear( + x_63, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_63 = ( + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_4 = torch.nn.functional.silu(x_gate_4, inplace=False) + x_gate_4 = None + x_65 = silu_4 * x_64 + silu_4 = x_64 = None + x_66 = torch.nn.functional.dropout(x_65, 0.0, False, False) + x_65 = None + x_67 = torch._C._nn.linear( + x_66, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_66 = ( + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_68 = torch.nn.functional.dropout(x_67, 0.0, False, False) + x_67 = None + x_69 = x_62 + x_68 + x_62 = x_68 = None + x_70 = torch.rms_norm( + x_69, + (3072,), + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ = None + linear_25 = torch._C._nn.linear( + x_70, + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_70 = ( + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_10 = linear_25.reshape(1, 256, 3, 24, 128) + linear_25 = None + qkv_5 = reshape_10.permute(2, 0, 3, 1, 4) + reshape_10 = None + unbind_5 = qkv_5.unbind(0) + qkv_5 = None + q_5 = unbind_5[0] + k_5 = unbind_5[1] + v_5 = unbind_5[2] + unbind_5 = None + x_71 = torch._C._nn.scaled_dot_product_attention( + q_5, k_5, v_5, attn_mask=None, dropout_p=0.0 + ) + q_5 = k_5 = v_5 = None + transpose_6 = x_71.transpose(1, 2) + x_71 = None + x_72 = transpose_6.reshape(1, 256, 3072) + transpose_6 = None + x_73 = torch._C._nn.linear( + x_72, + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_72 = ( + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ + ) = None + x_74 = torch.nn.functional.dropout(x_73, 0.0, False, False) + x_73 = None + x_75 = x_69 + x_74 + x_69 = x_74 = None + x_76 = torch.rms_norm( + x_75, + (3072,), + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ = None + x_gate_5 = torch._C._nn.linear( + x_76, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_77 = torch._C._nn.linear( + x_76, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_76 = ( + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_5 = torch.nn.functional.silu(x_gate_5, inplace=False) + x_gate_5 = None + x_78 = silu_5 * x_77 + silu_5 = x_77 = None + x_79 = torch.nn.functional.dropout(x_78, 0.0, False, False) + x_78 = None + x_80 = torch._C._nn.linear( + x_79, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_79 = ( + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_81 = torch.nn.functional.dropout(x_80, 0.0, False, False) + x_80 = None + x_82 = x_75 + x_81 + x_75 = x_81 = None + x_83 = torch.rms_norm( + x_82, + (3072,), + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ = None + linear_30 = torch._C._nn.linear( + x_83, + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_83 = ( + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_12 = linear_30.reshape(1, 256, 3, 24, 128) + linear_30 = None + qkv_6 = reshape_12.permute(2, 0, 3, 1, 4) + reshape_12 = None + unbind_6 = qkv_6.unbind(0) + qkv_6 = None + q_6 = unbind_6[0] + k_6 = unbind_6[1] + v_6 = unbind_6[2] + unbind_6 = None + x_84 = torch._C._nn.scaled_dot_product_attention( + q_6, k_6, v_6, attn_mask=None, dropout_p=0.0 + ) + q_6 = k_6 = v_6 = None + transpose_7 = x_84.transpose(1, 2) + x_84 = None + x_85 = transpose_7.reshape(1, 256, 3072) + transpose_7 = None + x_86 = torch._C._nn.linear( + x_85, + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_85 = ( + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ + ) = None + x_87 = torch.nn.functional.dropout(x_86, 0.0, False, False) + x_86 = None + x_88 = x_82 + x_87 + x_82 = x_87 = None + x_89 = torch.rms_norm( + x_88, + (3072,), + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ = None + x_gate_6 = torch._C._nn.linear( + x_89, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_90 = torch._C._nn.linear( + x_89, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_89 = ( + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_6 = torch.nn.functional.silu(x_gate_6, inplace=False) + x_gate_6 = None + x_91 = silu_6 * x_90 + silu_6 = x_90 = None + x_92 = torch.nn.functional.dropout(x_91, 0.0, False, False) + x_91 = None + x_93 = torch._C._nn.linear( + x_92, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_92 = ( + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_94 = torch.nn.functional.dropout(x_93, 0.0, False, False) + x_93 = None + x_95 = x_88 + x_94 + x_88 = x_94 = None + x_96 = torch.rms_norm( + x_95, + (3072,), + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ = None + linear_35 = torch._C._nn.linear( + x_96, + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_96 = ( + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_14 = linear_35.reshape(1, 256, 3, 24, 128) + linear_35 = None + qkv_7 = reshape_14.permute(2, 0, 3, 1, 4) + reshape_14 = None + unbind_7 = qkv_7.unbind(0) + qkv_7 = None + q_7 = unbind_7[0] + k_7 = unbind_7[1] + v_7 = unbind_7[2] + unbind_7 = None + x_97 = torch._C._nn.scaled_dot_product_attention( + q_7, k_7, v_7, attn_mask=None, dropout_p=0.0 + ) + q_7 = k_7 = v_7 = None + transpose_8 = x_97.transpose(1, 2) + x_97 = None + x_98 = transpose_8.reshape(1, 256, 3072) + transpose_8 = None + x_99 = torch._C._nn.linear( + x_98, + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_98 = ( + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ + ) = None + x_100 = torch.nn.functional.dropout(x_99, 0.0, False, False) + x_99 = None + x_101 = x_95 + x_100 + x_95 = x_100 = None + x_102 = torch.rms_norm( + x_101, + (3072,), + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ = None + x_gate_7 = torch._C._nn.linear( + x_102, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_103 = torch._C._nn.linear( + x_102, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_102 = ( + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_7 = torch.nn.functional.silu(x_gate_7, inplace=False) + x_gate_7 = None + x_104 = silu_7 * x_103 + silu_7 = x_103 = None + x_105 = torch.nn.functional.dropout(x_104, 0.0, False, False) + x_104 = None + x_106 = torch._C._nn.linear( + x_105, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_105 = ( + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_107 = torch.nn.functional.dropout(x_106, 0.0, False, False) + x_106 = None + x_108 = x_101 + x_107 + x_101 = x_107 = None + x_109 = torch.rms_norm( + x_108, + (3072,), + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ = None + linear_40 = torch._C._nn.linear( + x_109, + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_109 = ( + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_16 = linear_40.reshape(1, 256, 3, 24, 128) + linear_40 = None + qkv_8 = reshape_16.permute(2, 0, 3, 1, 4) + reshape_16 = None + unbind_8 = qkv_8.unbind(0) + qkv_8 = None + q_8 = unbind_8[0] + k_8 = unbind_8[1] + v_8 = unbind_8[2] + unbind_8 = None + x_110 = torch._C._nn.scaled_dot_product_attention( + q_8, k_8, v_8, attn_mask=None, dropout_p=0.0 + ) + q_8 = k_8 = v_8 = None + transpose_9 = x_110.transpose(1, 2) + x_110 = None + x_111 = transpose_9.reshape(1, 256, 3072) + transpose_9 = None + x_112 = torch._C._nn.linear( + x_111, + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_111 = ( + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ + ) = None + x_113 = torch.nn.functional.dropout(x_112, 0.0, False, False) + x_112 = None + x_114 = x_108 + x_113 + x_108 = x_113 = None + x_115 = torch.rms_norm( + x_114, + (3072,), + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ = None + x_gate_8 = torch._C._nn.linear( + x_115, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_116 = torch._C._nn.linear( + x_115, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_115 = ( + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_8 = torch.nn.functional.silu(x_gate_8, inplace=False) + x_gate_8 = None + x_117 = silu_8 * x_116 + silu_8 = x_116 = None + x_118 = torch.nn.functional.dropout(x_117, 0.0, False, False) + x_117 = None + x_119 = torch._C._nn.linear( + x_118, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_118 = ( + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_120 = torch.nn.functional.dropout(x_119, 0.0, False, False) + x_119 = None + x_121 = x_114 + x_120 + x_114 = x_120 = None + x_122 = torch.rms_norm( + x_121, + (3072,), + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ = None + linear_45 = torch._C._nn.linear( + x_122, + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_122 = ( + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_18 = linear_45.reshape(1, 256, 3, 24, 128) + linear_45 = None + qkv_9 = reshape_18.permute(2, 0, 3, 1, 4) + reshape_18 = None + unbind_9 = qkv_9.unbind(0) + qkv_9 = None + q_9 = unbind_9[0] + k_9 = unbind_9[1] + v_9 = unbind_9[2] + unbind_9 = None + x_123 = torch._C._nn.scaled_dot_product_attention( + q_9, k_9, v_9, attn_mask=None, dropout_p=0.0 + ) + q_9 = k_9 = v_9 = None + transpose_10 = x_123.transpose(1, 2) + x_123 = None + x_124 = transpose_10.reshape(1, 256, 3072) + transpose_10 = None + x_125 = torch._C._nn.linear( + x_124, + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_124 = ( + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ + ) = None + x_126 = torch.nn.functional.dropout(x_125, 0.0, False, False) + x_125 = None + x_127 = x_121 + x_126 + x_121 = x_126 = None + x_128 = torch.rms_norm( + x_127, + (3072,), + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ = None + x_gate_9 = torch._C._nn.linear( + x_128, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_129 = torch._C._nn.linear( + x_128, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_128 = ( + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_9 = torch.nn.functional.silu(x_gate_9, inplace=False) + x_gate_9 = None + x_130 = silu_9 * x_129 + silu_9 = x_129 = None + x_131 = torch.nn.functional.dropout(x_130, 0.0, False, False) + x_130 = None + x_132 = torch._C._nn.linear( + x_131, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_131 = ( + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_133 = torch.nn.functional.dropout(x_132, 0.0, False, False) + x_132 = None + x_134 = x_127 + x_133 + x_127 = x_133 = None + x_135 = torch.rms_norm( + x_134, + (3072,), + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ = None + linear_50 = torch._C._nn.linear( + x_135, + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_135 = ( + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_20 = linear_50.reshape(1, 256, 3, 24, 128) + linear_50 = None + qkv_10 = reshape_20.permute(2, 0, 3, 1, 4) + reshape_20 = None + unbind_10 = qkv_10.unbind(0) + qkv_10 = None + q_10 = unbind_10[0] + k_10 = unbind_10[1] + v_10 = unbind_10[2] + unbind_10 = None + x_136 = torch._C._nn.scaled_dot_product_attention( + q_10, k_10, v_10, attn_mask=None, dropout_p=0.0 + ) + q_10 = k_10 = v_10 = None + transpose_11 = x_136.transpose(1, 2) + x_136 = None + x_137 = transpose_11.reshape(1, 256, 3072) + transpose_11 = None + x_138 = torch._C._nn.linear( + x_137, + l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_137 = l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ = (None) + x_139 = torch.nn.functional.dropout(x_138, 0.0, False, False) + x_138 = None + x_140 = x_134 + x_139 + x_134 = x_139 = None + x_141 = torch.rms_norm( + x_140, + (3072,), + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ = None + x_gate_10 = torch._C._nn.linear( + x_141, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_142 = torch._C._nn.linear( + x_141, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_141 = l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_10 = torch.nn.functional.silu(x_gate_10, inplace=False) + x_gate_10 = None + x_143 = silu_10 * x_142 + silu_10 = x_142 = None + x_144 = torch.nn.functional.dropout(x_143, 0.0, False, False) + x_143 = None + x_145 = torch._C._nn.linear( + x_144, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_144 = ( + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_146 = torch.nn.functional.dropout(x_145, 0.0, False, False) + x_145 = None + x_147 = x_140 + x_146 + x_140 = x_146 = None + x_148 = torch.rms_norm( + x_147, + (3072,), + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ = None + linear_55 = torch._C._nn.linear( + x_148, + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_148 = ( + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_22 = linear_55.reshape(1, 256, 3, 24, 128) + linear_55 = None + qkv_11 = reshape_22.permute(2, 0, 3, 1, 4) + reshape_22 = None + unbind_11 = qkv_11.unbind(0) + qkv_11 = None + q_11 = unbind_11[0] + k_11 = unbind_11[1] + v_11 = unbind_11[2] + unbind_11 = None + x_149 = torch._C._nn.scaled_dot_product_attention( + q_11, k_11, v_11, attn_mask=None, dropout_p=0.0 + ) + q_11 = k_11 = v_11 = None + transpose_12 = x_149.transpose(1, 2) + x_149 = None + x_150 = transpose_12.reshape(1, 256, 3072) + transpose_12 = None + x_151 = torch._C._nn.linear( + x_150, + l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_150 = l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ = (None) + x_152 = torch.nn.functional.dropout(x_151, 0.0, False, False) + x_151 = None + x_153 = x_147 + x_152 + x_147 = x_152 = None + x_154 = torch.rms_norm( + x_153, + (3072,), + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ = None + x_gate_11 = torch._C._nn.linear( + x_154, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_155 = torch._C._nn.linear( + x_154, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_154 = l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_11 = torch.nn.functional.silu(x_gate_11, inplace=False) + x_gate_11 = None + x_156 = silu_11 * x_155 + silu_11 = x_155 = None + x_157 = torch.nn.functional.dropout(x_156, 0.0, False, False) + x_156 = None + x_158 = torch._C._nn.linear( + x_157, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_157 = ( + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_159 = torch.nn.functional.dropout(x_158, 0.0, False, False) + x_158 = None + x_160 = x_153 + x_159 + x_153 = x_159 = None + x_161 = torch.rms_norm( + x_160, + (3072,), + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ = None + linear_60 = torch._C._nn.linear( + x_161, + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_161 = ( + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_24 = linear_60.reshape(1, 256, 3, 24, 128) + linear_60 = None + qkv_12 = reshape_24.permute(2, 0, 3, 1, 4) + reshape_24 = None + unbind_12 = qkv_12.unbind(0) + qkv_12 = None + q_12 = unbind_12[0] + k_12 = unbind_12[1] + v_12 = unbind_12[2] + unbind_12 = None + x_162 = torch._C._nn.scaled_dot_product_attention( + q_12, k_12, v_12, attn_mask=None, dropout_p=0.0 + ) + q_12 = k_12 = v_12 = None + transpose_13 = x_162.transpose(1, 2) + x_162 = None + x_163 = transpose_13.reshape(1, 256, 3072) + transpose_13 = None + x_164 = torch._C._nn.linear( + x_163, + l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_163 = l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ = (None) + x_165 = torch.nn.functional.dropout(x_164, 0.0, False, False) + x_164 = None + x_166 = x_160 + x_165 + x_160 = x_165 = None + x_167 = torch.rms_norm( + x_166, + (3072,), + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ = None + x_gate_12 = torch._C._nn.linear( + x_167, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_168 = torch._C._nn.linear( + x_167, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_167 = l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_12 = torch.nn.functional.silu(x_gate_12, inplace=False) + x_gate_12 = None + x_169 = silu_12 * x_168 + silu_12 = x_168 = None + x_170 = torch.nn.functional.dropout(x_169, 0.0, False, False) + x_169 = None + x_171 = torch._C._nn.linear( + x_170, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_170 = ( + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_172 = torch.nn.functional.dropout(x_171, 0.0, False, False) + x_171 = None + x_173 = x_166 + x_172 + x_166 = x_172 = None + x_174 = torch.rms_norm( + x_173, + (3072,), + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ = None + linear_65 = torch._C._nn.linear( + x_174, + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_174 = ( + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_26 = linear_65.reshape(1, 256, 3, 24, 128) + linear_65 = None + qkv_13 = reshape_26.permute(2, 0, 3, 1, 4) + reshape_26 = None + unbind_13 = qkv_13.unbind(0) + qkv_13 = None + q_13 = unbind_13[0] + k_13 = unbind_13[1] + v_13 = unbind_13[2] + unbind_13 = None + x_175 = torch._C._nn.scaled_dot_product_attention( + q_13, k_13, v_13, attn_mask=None, dropout_p=0.0 + ) + q_13 = k_13 = v_13 = None + transpose_14 = x_175.transpose(1, 2) + x_175 = None + x_176 = transpose_14.reshape(1, 256, 3072) + transpose_14 = None + x_177 = torch._C._nn.linear( + x_176, + l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_176 = l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ = (None) + x_178 = torch.nn.functional.dropout(x_177, 0.0, False, False) + x_177 = None + x_179 = x_173 + x_178 + x_173 = x_178 = None + x_180 = torch.rms_norm( + x_179, + (3072,), + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ = None + x_gate_13 = torch._C._nn.linear( + x_180, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_181 = torch._C._nn.linear( + x_180, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_180 = l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_13 = torch.nn.functional.silu(x_gate_13, inplace=False) + x_gate_13 = None + x_182 = silu_13 * x_181 + silu_13 = x_181 = None + x_183 = torch.nn.functional.dropout(x_182, 0.0, False, False) + x_182 = None + x_184 = torch._C._nn.linear( + x_183, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_183 = ( + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_185 = torch.nn.functional.dropout(x_184, 0.0, False, False) + x_184 = None + x_186 = x_179 + x_185 + x_179 = x_185 = None + x_187 = torch.rms_norm( + x_186, + (3072,), + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ = None + linear_70 = torch._C._nn.linear( + x_187, + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_187 = ( + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_28 = linear_70.reshape(1, 256, 3, 24, 128) + linear_70 = None + qkv_14 = reshape_28.permute(2, 0, 3, 1, 4) + reshape_28 = None + unbind_14 = qkv_14.unbind(0) + qkv_14 = None + q_14 = unbind_14[0] + k_14 = unbind_14[1] + v_14 = unbind_14[2] + unbind_14 = None + x_188 = torch._C._nn.scaled_dot_product_attention( + q_14, k_14, v_14, attn_mask=None, dropout_p=0.0 + ) + q_14 = k_14 = v_14 = None + transpose_15 = x_188.transpose(1, 2) + x_188 = None + x_189 = transpose_15.reshape(1, 256, 3072) + transpose_15 = None + x_190 = torch._C._nn.linear( + x_189, + l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_189 = l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ = (None) + x_191 = torch.nn.functional.dropout(x_190, 0.0, False, False) + x_190 = None + x_192 = x_186 + x_191 + x_186 = x_191 = None + x_193 = torch.rms_norm( + x_192, + (3072,), + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ = None + x_gate_14 = torch._C._nn.linear( + x_193, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_194 = torch._C._nn.linear( + x_193, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_193 = l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_14 = torch.nn.functional.silu(x_gate_14, inplace=False) + x_gate_14 = None + x_195 = silu_14 * x_194 + silu_14 = x_194 = None + x_196 = torch.nn.functional.dropout(x_195, 0.0, False, False) + x_195 = None + x_197 = torch._C._nn.linear( + x_196, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_196 = ( + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_198 = torch.nn.functional.dropout(x_197, 0.0, False, False) + x_197 = None + x_199 = x_192 + x_198 + x_192 = x_198 = None + x_200 = torch.rms_norm( + x_199, + (3072,), + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ = None + linear_75 = torch._C._nn.linear( + x_200, + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_200 = ( + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_30 = linear_75.reshape(1, 256, 3, 24, 128) + linear_75 = None + qkv_15 = reshape_30.permute(2, 0, 3, 1, 4) + reshape_30 = None + unbind_15 = qkv_15.unbind(0) + qkv_15 = None + q_15 = unbind_15[0] + k_15 = unbind_15[1] + v_15 = unbind_15[2] + unbind_15 = None + x_201 = torch._C._nn.scaled_dot_product_attention( + q_15, k_15, v_15, attn_mask=None, dropout_p=0.0 + ) + q_15 = k_15 = v_15 = None + transpose_16 = x_201.transpose(1, 2) + x_201 = None + x_202 = transpose_16.reshape(1, 256, 3072) + transpose_16 = None + x_203 = torch._C._nn.linear( + x_202, + l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_202 = l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ = (None) + x_204 = torch.nn.functional.dropout(x_203, 0.0, False, False) + x_203 = None + x_205 = x_199 + x_204 + x_199 = x_204 = None + x_206 = torch.rms_norm( + x_205, + (3072,), + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ = None + x_gate_15 = torch._C._nn.linear( + x_206, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_207 = torch._C._nn.linear( + x_206, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_206 = l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_15 = torch.nn.functional.silu(x_gate_15, inplace=False) + x_gate_15 = None + x_208 = silu_15 * x_207 + silu_15 = x_207 = None + x_209 = torch.nn.functional.dropout(x_208, 0.0, False, False) + x_208 = None + x_210 = torch._C._nn.linear( + x_209, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_209 = ( + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_211 = torch.nn.functional.dropout(x_210, 0.0, False, False) + x_210 = None + x_212 = x_205 + x_211 + x_205 = x_211 = None + x_213 = torch.rms_norm( + x_212, + (3072,), + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ = None + linear_80 = torch._C._nn.linear( + x_213, + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_213 = ( + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_32 = linear_80.reshape(1, 256, 3, 24, 128) + linear_80 = None + qkv_16 = reshape_32.permute(2, 0, 3, 1, 4) + reshape_32 = None + unbind_16 = qkv_16.unbind(0) + qkv_16 = None + q_16 = unbind_16[0] + k_16 = unbind_16[1] + v_16 = unbind_16[2] + unbind_16 = None + x_214 = torch._C._nn.scaled_dot_product_attention( + q_16, k_16, v_16, attn_mask=None, dropout_p=0.0 + ) + q_16 = k_16 = v_16 = None + transpose_17 = x_214.transpose(1, 2) + x_214 = None + x_215 = transpose_17.reshape(1, 256, 3072) + transpose_17 = None + x_216 = torch._C._nn.linear( + x_215, + l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_215 = l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ = (None) + x_217 = torch.nn.functional.dropout(x_216, 0.0, False, False) + x_216 = None + x_218 = x_212 + x_217 + x_212 = x_217 = None + x_219 = torch.rms_norm( + x_218, + (3072,), + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ = None + x_gate_16 = torch._C._nn.linear( + x_219, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_220 = torch._C._nn.linear( + x_219, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_219 = l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_16 = torch.nn.functional.silu(x_gate_16, inplace=False) + x_gate_16 = None + x_221 = silu_16 * x_220 + silu_16 = x_220 = None + x_222 = torch.nn.functional.dropout(x_221, 0.0, False, False) + x_221 = None + x_223 = torch._C._nn.linear( + x_222, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_222 = ( + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_224 = torch.nn.functional.dropout(x_223, 0.0, False, False) + x_223 = None + x_225 = x_218 + x_224 + x_218 = x_224 = None + x_226 = torch.rms_norm( + x_225, + (3072,), + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ = None + linear_85 = torch._C._nn.linear( + x_226, + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_226 = ( + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_34 = linear_85.reshape(1, 256, 3, 24, 128) + linear_85 = None + qkv_17 = reshape_34.permute(2, 0, 3, 1, 4) + reshape_34 = None + unbind_17 = qkv_17.unbind(0) + qkv_17 = None + q_17 = unbind_17[0] + k_17 = unbind_17[1] + v_17 = unbind_17[2] + unbind_17 = None + x_227 = torch._C._nn.scaled_dot_product_attention( + q_17, k_17, v_17, attn_mask=None, dropout_p=0.0 + ) + q_17 = k_17 = v_17 = None + transpose_18 = x_227.transpose(1, 2) + x_227 = None + x_228 = transpose_18.reshape(1, 256, 3072) + transpose_18 = None + x_229 = torch._C._nn.linear( + x_228, + l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_228 = l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ = (None) + x_230 = torch.nn.functional.dropout(x_229, 0.0, False, False) + x_229 = None + x_231 = x_225 + x_230 + x_225 = x_230 = None + x_232 = torch.rms_norm( + x_231, + (3072,), + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ = None + x_gate_17 = torch._C._nn.linear( + x_232, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_233 = torch._C._nn.linear( + x_232, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_232 = l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_17 = torch.nn.functional.silu(x_gate_17, inplace=False) + x_gate_17 = None + x_234 = silu_17 * x_233 + silu_17 = x_233 = None + x_235 = torch.nn.functional.dropout(x_234, 0.0, False, False) + x_234 = None + x_236 = torch._C._nn.linear( + x_235, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_235 = ( + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_237 = torch.nn.functional.dropout(x_236, 0.0, False, False) + x_236 = None + x_238 = x_231 + x_237 + x_231 = x_237 = None + x_239 = torch.rms_norm( + x_238, + (3072,), + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ = None + linear_90 = torch._C._nn.linear( + x_239, + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_239 = ( + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_36 = linear_90.reshape(1, 256, 3, 24, 128) + linear_90 = None + qkv_18 = reshape_36.permute(2, 0, 3, 1, 4) + reshape_36 = None + unbind_18 = qkv_18.unbind(0) + qkv_18 = None + q_18 = unbind_18[0] + k_18 = unbind_18[1] + v_18 = unbind_18[2] + unbind_18 = None + x_240 = torch._C._nn.scaled_dot_product_attention( + q_18, k_18, v_18, attn_mask=None, dropout_p=0.0 + ) + q_18 = k_18 = v_18 = None + transpose_19 = x_240.transpose(1, 2) + x_240 = None + x_241 = transpose_19.reshape(1, 256, 3072) + transpose_19 = None + x_242 = torch._C._nn.linear( + x_241, + l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_241 = l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ = (None) + x_243 = torch.nn.functional.dropout(x_242, 0.0, False, False) + x_242 = None + x_244 = x_238 + x_243 + x_238 = x_243 = None + x_245 = torch.rms_norm( + x_244, + (3072,), + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ = None + x_gate_18 = torch._C._nn.linear( + x_245, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_246 = torch._C._nn.linear( + x_245, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_245 = l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_18 = torch.nn.functional.silu(x_gate_18, inplace=False) + x_gate_18 = None + x_247 = silu_18 * x_246 + silu_18 = x_246 = None + x_248 = torch.nn.functional.dropout(x_247, 0.0, False, False) + x_247 = None + x_249 = torch._C._nn.linear( + x_248, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_248 = ( + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_250 = torch.nn.functional.dropout(x_249, 0.0, False, False) + x_249 = None + x_251 = x_244 + x_250 + x_244 = x_250 = None + x_252 = torch.rms_norm( + x_251, + (3072,), + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ = None + linear_95 = torch._C._nn.linear( + x_252, + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_252 = ( + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_38 = linear_95.reshape(1, 256, 3, 24, 128) + linear_95 = None + qkv_19 = reshape_38.permute(2, 0, 3, 1, 4) + reshape_38 = None + unbind_19 = qkv_19.unbind(0) + qkv_19 = None + q_19 = unbind_19[0] + k_19 = unbind_19[1] + v_19 = unbind_19[2] + unbind_19 = None + x_253 = torch._C._nn.scaled_dot_product_attention( + q_19, k_19, v_19, attn_mask=None, dropout_p=0.0 + ) + q_19 = k_19 = v_19 = None + transpose_20 = x_253.transpose(1, 2) + x_253 = None + x_254 = transpose_20.reshape(1, 256, 3072) + transpose_20 = None + x_255 = torch._C._nn.linear( + x_254, + l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_254 = l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ = (None) + x_256 = torch.nn.functional.dropout(x_255, 0.0, False, False) + x_255 = None + x_257 = x_251 + x_256 + x_251 = x_256 = None + x_258 = torch.rms_norm( + x_257, + (3072,), + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ = None + x_gate_19 = torch._C._nn.linear( + x_258, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_259 = torch._C._nn.linear( + x_258, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_258 = l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_19 = torch.nn.functional.silu(x_gate_19, inplace=False) + x_gate_19 = None + x_260 = silu_19 * x_259 + silu_19 = x_259 = None + x_261 = torch.nn.functional.dropout(x_260, 0.0, False, False) + x_260 = None + x_262 = torch._C._nn.linear( + x_261, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_261 = ( + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_263 = torch.nn.functional.dropout(x_262, 0.0, False, False) + x_262 = None + x_264 = x_257 + x_263 + x_257 = x_263 = None + x_265 = torch.rms_norm( + x_264, + (3072,), + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ = None + linear_100 = torch._C._nn.linear( + x_265, + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_265 = ( + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_40 = linear_100.reshape(1, 256, 3, 24, 128) + linear_100 = None + qkv_20 = reshape_40.permute(2, 0, 3, 1, 4) + reshape_40 = None + unbind_20 = qkv_20.unbind(0) + qkv_20 = None + q_20 = unbind_20[0] + k_20 = unbind_20[1] + v_20 = unbind_20[2] + unbind_20 = None + x_266 = torch._C._nn.scaled_dot_product_attention( + q_20, k_20, v_20, attn_mask=None, dropout_p=0.0 + ) + q_20 = k_20 = v_20 = None + transpose_21 = x_266.transpose(1, 2) + x_266 = None + x_267 = transpose_21.reshape(1, 256, 3072) + transpose_21 = None + x_268 = torch._C._nn.linear( + x_267, + l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_267 = l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ = (None) + x_269 = torch.nn.functional.dropout(x_268, 0.0, False, False) + x_268 = None + x_270 = x_264 + x_269 + x_264 = x_269 = None + x_271 = torch.rms_norm( + x_270, + (3072,), + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ = None + x_gate_20 = torch._C._nn.linear( + x_271, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_272 = torch._C._nn.linear( + x_271, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_271 = l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_20 = torch.nn.functional.silu(x_gate_20, inplace=False) + x_gate_20 = None + x_273 = silu_20 * x_272 + silu_20 = x_272 = None + x_274 = torch.nn.functional.dropout(x_273, 0.0, False, False) + x_273 = None + x_275 = torch._C._nn.linear( + x_274, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_274 = ( + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_276 = torch.nn.functional.dropout(x_275, 0.0, False, False) + x_275 = None + x_277 = x_270 + x_276 + x_270 = x_276 = None + x_278 = torch.rms_norm( + x_277, + (3072,), + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ = None + linear_105 = torch._C._nn.linear( + x_278, + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_278 = ( + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_42 = linear_105.reshape(1, 256, 3, 24, 128) + linear_105 = None + qkv_21 = reshape_42.permute(2, 0, 3, 1, 4) + reshape_42 = None + unbind_21 = qkv_21.unbind(0) + qkv_21 = None + q_21 = unbind_21[0] + k_21 = unbind_21[1] + v_21 = unbind_21[2] + unbind_21 = None + x_279 = torch._C._nn.scaled_dot_product_attention( + q_21, k_21, v_21, attn_mask=None, dropout_p=0.0 + ) + q_21 = k_21 = v_21 = None + transpose_22 = x_279.transpose(1, 2) + x_279 = None + x_280 = transpose_22.reshape(1, 256, 3072) + transpose_22 = None + x_281 = torch._C._nn.linear( + x_280, + l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_280 = l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ = (None) + x_282 = torch.nn.functional.dropout(x_281, 0.0, False, False) + x_281 = None + x_283 = x_277 + x_282 + x_277 = x_282 = None + x_284 = torch.rms_norm( + x_283, + (3072,), + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ = None + x_gate_21 = torch._C._nn.linear( + x_284, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_285 = torch._C._nn.linear( + x_284, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_284 = l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_21 = torch.nn.functional.silu(x_gate_21, inplace=False) + x_gate_21 = None + x_286 = silu_21 * x_285 + silu_21 = x_285 = None + x_287 = torch.nn.functional.dropout(x_286, 0.0, False, False) + x_286 = None + x_288 = torch._C._nn.linear( + x_287, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_287 = ( + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_289 = torch.nn.functional.dropout(x_288, 0.0, False, False) + x_288 = None + x_290 = x_283 + x_289 + x_283 = x_289 = None + x_291 = torch.rms_norm( + x_290, + (3072,), + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ = None + linear_110 = torch._C._nn.linear( + x_291, + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_291 = ( + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_44 = linear_110.reshape(1, 256, 3, 24, 128) + linear_110 = None + qkv_22 = reshape_44.permute(2, 0, 3, 1, 4) + reshape_44 = None + unbind_22 = qkv_22.unbind(0) + qkv_22 = None + q_22 = unbind_22[0] + k_22 = unbind_22[1] + v_22 = unbind_22[2] + unbind_22 = None + x_292 = torch._C._nn.scaled_dot_product_attention( + q_22, k_22, v_22, attn_mask=None, dropout_p=0.0 + ) + q_22 = k_22 = v_22 = None + transpose_23 = x_292.transpose(1, 2) + x_292 = None + x_293 = transpose_23.reshape(1, 256, 3072) + transpose_23 = None + x_294 = torch._C._nn.linear( + x_293, + l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_293 = l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ = (None) + x_295 = torch.nn.functional.dropout(x_294, 0.0, False, False) + x_294 = None + x_296 = x_290 + x_295 + x_290 = x_295 = None + x_297 = torch.rms_norm( + x_296, + (3072,), + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ = None + x_gate_22 = torch._C._nn.linear( + x_297, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_298 = torch._C._nn.linear( + x_297, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_297 = l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_22 = torch.nn.functional.silu(x_gate_22, inplace=False) + x_gate_22 = None + x_299 = silu_22 * x_298 + silu_22 = x_298 = None + x_300 = torch.nn.functional.dropout(x_299, 0.0, False, False) + x_299 = None + x_301 = torch._C._nn.linear( + x_300, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_300 = ( + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_302 = torch.nn.functional.dropout(x_301, 0.0, False, False) + x_301 = None + x_303 = x_296 + x_302 + x_296 = x_302 = None + x_304 = torch.rms_norm( + x_303, + (3072,), + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ = None + linear_115 = torch._C._nn.linear( + x_304, + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_304 = ( + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_46 = linear_115.reshape(1, 256, 3, 24, 128) + linear_115 = None + qkv_23 = reshape_46.permute(2, 0, 3, 1, 4) + reshape_46 = None + unbind_23 = qkv_23.unbind(0) + qkv_23 = None + q_23 = unbind_23[0] + k_23 = unbind_23[1] + v_23 = unbind_23[2] + unbind_23 = None + x_305 = torch._C._nn.scaled_dot_product_attention( + q_23, k_23, v_23, attn_mask=None, dropout_p=0.0 + ) + q_23 = k_23 = v_23 = None + transpose_24 = x_305.transpose(1, 2) + x_305 = None + x_306 = transpose_24.reshape(1, 256, 3072) + transpose_24 = None + x_307 = torch._C._nn.linear( + x_306, + l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_306 = l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ = (None) + x_308 = torch.nn.functional.dropout(x_307, 0.0, False, False) + x_307 = None + x_309 = x_303 + x_308 + x_303 = x_308 = None + x_310 = torch.rms_norm( + x_309, + (3072,), + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ = None + x_gate_23 = torch._C._nn.linear( + x_310, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_311 = torch._C._nn.linear( + x_310, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_310 = l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_23 = torch.nn.functional.silu(x_gate_23, inplace=False) + x_gate_23 = None + x_312 = silu_23 * x_311 + silu_23 = x_311 = None + x_313 = torch.nn.functional.dropout(x_312, 0.0, False, False) + x_312 = None + x_314 = torch._C._nn.linear( + x_313, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_313 = ( + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_315 = torch.nn.functional.dropout(x_314, 0.0, False, False) + x_314 = None + x_316 = x_309 + x_315 + x_309 = x_315 = None + x_317 = torch.rms_norm( + x_316, (3072,), l_self_modules_norm_parameters_weight_, 1e-05 + ) + x_316 = l_self_modules_norm_parameters_weight_ = None + x_318 = x_317[ + (slice(None, None, None), slice(l_self_num_prefix_tokens, None, None)) + ] + x_317 = l_self_num_prefix_tokens = None + x_319 = x_318.mean(dim=1) + x_318 = None + x_320 = torch.nn.functional.dropout(x_319, 0.0, False, False) + x_319 = None + return (x_320,) diff --git a/samples/timm/aimv2_3b_patch14_224.apple_pt/weight_meta.py b/samples/timm/aimv2_3b_patch14_224.apple_pt/weight_meta.py new file mode 100644 index 000000000..58803ef83 --- /dev/null +++ b/samples/timm/aimv2_3b_patch14_224.apple_pt/weight_meta.py @@ -0,0 +1,1920 @@ +class Program_weight_tensor_meta_L_x_: + name = "L_x_" + shape = [1, 3, 224, 224] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.227 + std = 1.285 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_proj_parameters_weight_: + name = "L_self_modules_patch_embed_modules_proj_parameters_weight_" + shape = [3072, 3, 14, 14] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.024 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_proj_parameters_bias_: + name = "L_self_modules_patch_embed_modules_proj_parameters_bias_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.024 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_norm_parameters_weight_: + name = "L_self_modules_patch_embed_modules_norm_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_parameters_pos_embed_: + name = "L_self_parameters_pos_embed_" + shape = [1, 256, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_" + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [9216, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_" + ) + shape = [3072, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [8192, 3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_" + shape = [3072, 8192] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_norm_parameters_weight_: + name = "L_self_modules_norm_parameters_weight_" + shape = [3072] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_num_prefix_tokens: + name = "L_self_num_prefix_tokens" + shape = [] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + data = [4] diff --git a/samples/timm/aimv2_huge_patch14_224.apple_pt/graph_hash.txt b/samples/timm/aimv2_huge_patch14_224.apple_pt/graph_hash.txt new file mode 100644 index 000000000..644cdd433 --- /dev/null +++ b/samples/timm/aimv2_huge_patch14_224.apple_pt/graph_hash.txt @@ -0,0 +1 @@ +d10372439b472d0f72f1ff88c60f8fbaf8b8dc0d8a0773c79e34ddee6c2cc78d \ No newline at end of file diff --git a/samples/timm/aimv2_huge_patch14_224.apple_pt/graph_net.json b/samples/timm/aimv2_huge_patch14_224.apple_pt/graph_net.json new file mode 100644 index 000000000..1373fe3b5 --- /dev/null +++ b/samples/timm/aimv2_huge_patch14_224.apple_pt/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/samples/timm/aimv2_huge_patch14_224.apple_pt/input_meta.py b/samples/timm/aimv2_huge_patch14_224.apple_pt/input_meta.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/timm/aimv2_huge_patch14_224.apple_pt/input_tensor_constraints.py b/samples/timm/aimv2_huge_patch14_224.apple_pt/input_tensor_constraints.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/timm/aimv2_huge_patch14_224.apple_pt/model.py b/samples/timm/aimv2_huge_patch14_224.apple_pt/model.py new file mode 100644 index 000000000..ce2db14af --- /dev/null +++ b/samples/timm/aimv2_huge_patch14_224.apple_pt/model.py @@ -0,0 +1,2663 @@ +import torch + + +class GraphModule(torch.nn.Module): + def forward( + self, + L_x_: torch.Tensor, + L_self_modules_patch_embed_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_patch_embed_modules_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_patch_embed_modules_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_parameters_pos_embed_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_num_prefix_tokens: torch.SymInt, + ): + l_x_ = L_x_ + l_self_modules_patch_embed_modules_proj_parameters_weight_ = ( + L_self_modules_patch_embed_modules_proj_parameters_weight_ + ) + l_self_modules_patch_embed_modules_proj_parameters_bias_ = ( + L_self_modules_patch_embed_modules_proj_parameters_bias_ + ) + l_self_modules_patch_embed_modules_norm_parameters_weight_ = ( + L_self_modules_patch_embed_modules_norm_parameters_weight_ + ) + l_self_parameters_pos_embed_ = L_self_parameters_pos_embed_ + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_norm_parameters_weight_ = L_self_modules_norm_parameters_weight_ + l_self_num_prefix_tokens = L_self_num_prefix_tokens + x = torch.conv2d( + l_x_, + l_self_modules_patch_embed_modules_proj_parameters_weight_, + l_self_modules_patch_embed_modules_proj_parameters_bias_, + (14, 14), + (0, 0), + (1, 1), + 1, + ) + l_x_ = ( + l_self_modules_patch_embed_modules_proj_parameters_weight_ + ) = l_self_modules_patch_embed_modules_proj_parameters_bias_ = None + flatten = x.flatten(2) + x = None + x_1 = flatten.transpose(1, 2) + flatten = None + x_2 = torch.rms_norm( + x_1, + (1536,), + l_self_modules_patch_embed_modules_norm_parameters_weight_, + 1e-05, + ) + x_1 = l_self_modules_patch_embed_modules_norm_parameters_weight_ = None + x_3 = x_2 + l_self_parameters_pos_embed_ + x_2 = l_self_parameters_pos_embed_ = None + x_4 = torch.nn.functional.dropout(x_3, 0.0, False, False) + x_3 = None + x_5 = torch.rms_norm( + x_4, + (1536,), + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ = None + linear = torch._C._nn.linear( + x_5, + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_5 = ( + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape = linear.reshape(1, 256, 3, 12, 128) + linear = None + qkv = reshape.permute(2, 0, 3, 1, 4) + reshape = None + unbind = qkv.unbind(0) + qkv = None + q = unbind[0] + k = unbind[1] + v = unbind[2] + unbind = None + x_6 = torch._C._nn.scaled_dot_product_attention( + q, k, v, attn_mask=None, dropout_p=0.0 + ) + q = k = v = None + transpose_1 = x_6.transpose(1, 2) + x_6 = None + x_7 = transpose_1.reshape(1, 256, 1536) + transpose_1 = None + x_8 = torch._C._nn.linear( + x_7, + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_7 = ( + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ + ) = None + x_9 = torch.nn.functional.dropout(x_8, 0.0, False, False) + x_8 = None + x_10 = x_4 + x_9 + x_4 = x_9 = None + x_11 = torch.rms_norm( + x_10, + (1536,), + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ = None + x_gate = torch._C._nn.linear( + x_11, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_12 = torch._C._nn.linear( + x_11, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_11 = ( + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu = torch.nn.functional.silu(x_gate, inplace=False) + x_gate = None + x_13 = silu * x_12 + silu = x_12 = None + x_14 = torch.nn.functional.dropout(x_13, 0.0, False, False) + x_13 = None + x_15 = torch._C._nn.linear( + x_14, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_14 = ( + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_16 = torch.nn.functional.dropout(x_15, 0.0, False, False) + x_15 = None + x_17 = x_10 + x_16 + x_10 = x_16 = None + x_18 = torch.rms_norm( + x_17, + (1536,), + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ = None + linear_5 = torch._C._nn.linear( + x_18, + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_18 = ( + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_2 = linear_5.reshape(1, 256, 3, 12, 128) + linear_5 = None + qkv_1 = reshape_2.permute(2, 0, 3, 1, 4) + reshape_2 = None + unbind_1 = qkv_1.unbind(0) + qkv_1 = None + q_1 = unbind_1[0] + k_1 = unbind_1[1] + v_1 = unbind_1[2] + unbind_1 = None + x_19 = torch._C._nn.scaled_dot_product_attention( + q_1, k_1, v_1, attn_mask=None, dropout_p=0.0 + ) + q_1 = k_1 = v_1 = None + transpose_2 = x_19.transpose(1, 2) + x_19 = None + x_20 = transpose_2.reshape(1, 256, 1536) + transpose_2 = None + x_21 = torch._C._nn.linear( + x_20, + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_20 = ( + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ + ) = None + x_22 = torch.nn.functional.dropout(x_21, 0.0, False, False) + x_21 = None + x_23 = x_17 + x_22 + x_17 = x_22 = None + x_24 = torch.rms_norm( + x_23, + (1536,), + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ = None + x_gate_1 = torch._C._nn.linear( + x_24, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_25 = torch._C._nn.linear( + x_24, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_24 = ( + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_1 = torch.nn.functional.silu(x_gate_1, inplace=False) + x_gate_1 = None + x_26 = silu_1 * x_25 + silu_1 = x_25 = None + x_27 = torch.nn.functional.dropout(x_26, 0.0, False, False) + x_26 = None + x_28 = torch._C._nn.linear( + x_27, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_27 = ( + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_29 = torch.nn.functional.dropout(x_28, 0.0, False, False) + x_28 = None + x_30 = x_23 + x_29 + x_23 = x_29 = None + x_31 = torch.rms_norm( + x_30, + (1536,), + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ = None + linear_10 = torch._C._nn.linear( + x_31, + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_31 = ( + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_4 = linear_10.reshape(1, 256, 3, 12, 128) + linear_10 = None + qkv_2 = reshape_4.permute(2, 0, 3, 1, 4) + reshape_4 = None + unbind_2 = qkv_2.unbind(0) + qkv_2 = None + q_2 = unbind_2[0] + k_2 = unbind_2[1] + v_2 = unbind_2[2] + unbind_2 = None + x_32 = torch._C._nn.scaled_dot_product_attention( + q_2, k_2, v_2, attn_mask=None, dropout_p=0.0 + ) + q_2 = k_2 = v_2 = None + transpose_3 = x_32.transpose(1, 2) + x_32 = None + x_33 = transpose_3.reshape(1, 256, 1536) + transpose_3 = None + x_34 = torch._C._nn.linear( + x_33, + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_33 = ( + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ + ) = None + x_35 = torch.nn.functional.dropout(x_34, 0.0, False, False) + x_34 = None + x_36 = x_30 + x_35 + x_30 = x_35 = None + x_37 = torch.rms_norm( + x_36, + (1536,), + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ = None + x_gate_2 = torch._C._nn.linear( + x_37, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_38 = torch._C._nn.linear( + x_37, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_37 = ( + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_2 = torch.nn.functional.silu(x_gate_2, inplace=False) + x_gate_2 = None + x_39 = silu_2 * x_38 + silu_2 = x_38 = None + x_40 = torch.nn.functional.dropout(x_39, 0.0, False, False) + x_39 = None + x_41 = torch._C._nn.linear( + x_40, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_40 = ( + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_42 = torch.nn.functional.dropout(x_41, 0.0, False, False) + x_41 = None + x_43 = x_36 + x_42 + x_36 = x_42 = None + x_44 = torch.rms_norm( + x_43, + (1536,), + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ = None + linear_15 = torch._C._nn.linear( + x_44, + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_44 = ( + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_6 = linear_15.reshape(1, 256, 3, 12, 128) + linear_15 = None + qkv_3 = reshape_6.permute(2, 0, 3, 1, 4) + reshape_6 = None + unbind_3 = qkv_3.unbind(0) + qkv_3 = None + q_3 = unbind_3[0] + k_3 = unbind_3[1] + v_3 = unbind_3[2] + unbind_3 = None + x_45 = torch._C._nn.scaled_dot_product_attention( + q_3, k_3, v_3, attn_mask=None, dropout_p=0.0 + ) + q_3 = k_3 = v_3 = None + transpose_4 = x_45.transpose(1, 2) + x_45 = None + x_46 = transpose_4.reshape(1, 256, 1536) + transpose_4 = None + x_47 = torch._C._nn.linear( + x_46, + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_46 = ( + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ + ) = None + x_48 = torch.nn.functional.dropout(x_47, 0.0, False, False) + x_47 = None + x_49 = x_43 + x_48 + x_43 = x_48 = None + x_50 = torch.rms_norm( + x_49, + (1536,), + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ = None + x_gate_3 = torch._C._nn.linear( + x_50, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_51 = torch._C._nn.linear( + x_50, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_50 = ( + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_3 = torch.nn.functional.silu(x_gate_3, inplace=False) + x_gate_3 = None + x_52 = silu_3 * x_51 + silu_3 = x_51 = None + x_53 = torch.nn.functional.dropout(x_52, 0.0, False, False) + x_52 = None + x_54 = torch._C._nn.linear( + x_53, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_53 = ( + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_55 = torch.nn.functional.dropout(x_54, 0.0, False, False) + x_54 = None + x_56 = x_49 + x_55 + x_49 = x_55 = None + x_57 = torch.rms_norm( + x_56, + (1536,), + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ = None + linear_20 = torch._C._nn.linear( + x_57, + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_57 = ( + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_8 = linear_20.reshape(1, 256, 3, 12, 128) + linear_20 = None + qkv_4 = reshape_8.permute(2, 0, 3, 1, 4) + reshape_8 = None + unbind_4 = qkv_4.unbind(0) + qkv_4 = None + q_4 = unbind_4[0] + k_4 = unbind_4[1] + v_4 = unbind_4[2] + unbind_4 = None + x_58 = torch._C._nn.scaled_dot_product_attention( + q_4, k_4, v_4, attn_mask=None, dropout_p=0.0 + ) + q_4 = k_4 = v_4 = None + transpose_5 = x_58.transpose(1, 2) + x_58 = None + x_59 = transpose_5.reshape(1, 256, 1536) + transpose_5 = None + x_60 = torch._C._nn.linear( + x_59, + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_59 = ( + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ + ) = None + x_61 = torch.nn.functional.dropout(x_60, 0.0, False, False) + x_60 = None + x_62 = x_56 + x_61 + x_56 = x_61 = None + x_63 = torch.rms_norm( + x_62, + (1536,), + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ = None + x_gate_4 = torch._C._nn.linear( + x_63, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_64 = torch._C._nn.linear( + x_63, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_63 = ( + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_4 = torch.nn.functional.silu(x_gate_4, inplace=False) + x_gate_4 = None + x_65 = silu_4 * x_64 + silu_4 = x_64 = None + x_66 = torch.nn.functional.dropout(x_65, 0.0, False, False) + x_65 = None + x_67 = torch._C._nn.linear( + x_66, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_66 = ( + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_68 = torch.nn.functional.dropout(x_67, 0.0, False, False) + x_67 = None + x_69 = x_62 + x_68 + x_62 = x_68 = None + x_70 = torch.rms_norm( + x_69, + (1536,), + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ = None + linear_25 = torch._C._nn.linear( + x_70, + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_70 = ( + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_10 = linear_25.reshape(1, 256, 3, 12, 128) + linear_25 = None + qkv_5 = reshape_10.permute(2, 0, 3, 1, 4) + reshape_10 = None + unbind_5 = qkv_5.unbind(0) + qkv_5 = None + q_5 = unbind_5[0] + k_5 = unbind_5[1] + v_5 = unbind_5[2] + unbind_5 = None + x_71 = torch._C._nn.scaled_dot_product_attention( + q_5, k_5, v_5, attn_mask=None, dropout_p=0.0 + ) + q_5 = k_5 = v_5 = None + transpose_6 = x_71.transpose(1, 2) + x_71 = None + x_72 = transpose_6.reshape(1, 256, 1536) + transpose_6 = None + x_73 = torch._C._nn.linear( + x_72, + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_72 = ( + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ + ) = None + x_74 = torch.nn.functional.dropout(x_73, 0.0, False, False) + x_73 = None + x_75 = x_69 + x_74 + x_69 = x_74 = None + x_76 = torch.rms_norm( + x_75, + (1536,), + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ = None + x_gate_5 = torch._C._nn.linear( + x_76, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_77 = torch._C._nn.linear( + x_76, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_76 = ( + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_5 = torch.nn.functional.silu(x_gate_5, inplace=False) + x_gate_5 = None + x_78 = silu_5 * x_77 + silu_5 = x_77 = None + x_79 = torch.nn.functional.dropout(x_78, 0.0, False, False) + x_78 = None + x_80 = torch._C._nn.linear( + x_79, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_79 = ( + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_81 = torch.nn.functional.dropout(x_80, 0.0, False, False) + x_80 = None + x_82 = x_75 + x_81 + x_75 = x_81 = None + x_83 = torch.rms_norm( + x_82, + (1536,), + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ = None + linear_30 = torch._C._nn.linear( + x_83, + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_83 = ( + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_12 = linear_30.reshape(1, 256, 3, 12, 128) + linear_30 = None + qkv_6 = reshape_12.permute(2, 0, 3, 1, 4) + reshape_12 = None + unbind_6 = qkv_6.unbind(0) + qkv_6 = None + q_6 = unbind_6[0] + k_6 = unbind_6[1] + v_6 = unbind_6[2] + unbind_6 = None + x_84 = torch._C._nn.scaled_dot_product_attention( + q_6, k_6, v_6, attn_mask=None, dropout_p=0.0 + ) + q_6 = k_6 = v_6 = None + transpose_7 = x_84.transpose(1, 2) + x_84 = None + x_85 = transpose_7.reshape(1, 256, 1536) + transpose_7 = None + x_86 = torch._C._nn.linear( + x_85, + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_85 = ( + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ + ) = None + x_87 = torch.nn.functional.dropout(x_86, 0.0, False, False) + x_86 = None + x_88 = x_82 + x_87 + x_82 = x_87 = None + x_89 = torch.rms_norm( + x_88, + (1536,), + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ = None + x_gate_6 = torch._C._nn.linear( + x_89, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_90 = torch._C._nn.linear( + x_89, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_89 = ( + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_6 = torch.nn.functional.silu(x_gate_6, inplace=False) + x_gate_6 = None + x_91 = silu_6 * x_90 + silu_6 = x_90 = None + x_92 = torch.nn.functional.dropout(x_91, 0.0, False, False) + x_91 = None + x_93 = torch._C._nn.linear( + x_92, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_92 = ( + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_94 = torch.nn.functional.dropout(x_93, 0.0, False, False) + x_93 = None + x_95 = x_88 + x_94 + x_88 = x_94 = None + x_96 = torch.rms_norm( + x_95, + (1536,), + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ = None + linear_35 = torch._C._nn.linear( + x_96, + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_96 = ( + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_14 = linear_35.reshape(1, 256, 3, 12, 128) + linear_35 = None + qkv_7 = reshape_14.permute(2, 0, 3, 1, 4) + reshape_14 = None + unbind_7 = qkv_7.unbind(0) + qkv_7 = None + q_7 = unbind_7[0] + k_7 = unbind_7[1] + v_7 = unbind_7[2] + unbind_7 = None + x_97 = torch._C._nn.scaled_dot_product_attention( + q_7, k_7, v_7, attn_mask=None, dropout_p=0.0 + ) + q_7 = k_7 = v_7 = None + transpose_8 = x_97.transpose(1, 2) + x_97 = None + x_98 = transpose_8.reshape(1, 256, 1536) + transpose_8 = None + x_99 = torch._C._nn.linear( + x_98, + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_98 = ( + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ + ) = None + x_100 = torch.nn.functional.dropout(x_99, 0.0, False, False) + x_99 = None + x_101 = x_95 + x_100 + x_95 = x_100 = None + x_102 = torch.rms_norm( + x_101, + (1536,), + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ = None + x_gate_7 = torch._C._nn.linear( + x_102, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_103 = torch._C._nn.linear( + x_102, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_102 = ( + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_7 = torch.nn.functional.silu(x_gate_7, inplace=False) + x_gate_7 = None + x_104 = silu_7 * x_103 + silu_7 = x_103 = None + x_105 = torch.nn.functional.dropout(x_104, 0.0, False, False) + x_104 = None + x_106 = torch._C._nn.linear( + x_105, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_105 = ( + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_107 = torch.nn.functional.dropout(x_106, 0.0, False, False) + x_106 = None + x_108 = x_101 + x_107 + x_101 = x_107 = None + x_109 = torch.rms_norm( + x_108, + (1536,), + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ = None + linear_40 = torch._C._nn.linear( + x_109, + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_109 = ( + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_16 = linear_40.reshape(1, 256, 3, 12, 128) + linear_40 = None + qkv_8 = reshape_16.permute(2, 0, 3, 1, 4) + reshape_16 = None + unbind_8 = qkv_8.unbind(0) + qkv_8 = None + q_8 = unbind_8[0] + k_8 = unbind_8[1] + v_8 = unbind_8[2] + unbind_8 = None + x_110 = torch._C._nn.scaled_dot_product_attention( + q_8, k_8, v_8, attn_mask=None, dropout_p=0.0 + ) + q_8 = k_8 = v_8 = None + transpose_9 = x_110.transpose(1, 2) + x_110 = None + x_111 = transpose_9.reshape(1, 256, 1536) + transpose_9 = None + x_112 = torch._C._nn.linear( + x_111, + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_111 = ( + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ + ) = None + x_113 = torch.nn.functional.dropout(x_112, 0.0, False, False) + x_112 = None + x_114 = x_108 + x_113 + x_108 = x_113 = None + x_115 = torch.rms_norm( + x_114, + (1536,), + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ = None + x_gate_8 = torch._C._nn.linear( + x_115, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_116 = torch._C._nn.linear( + x_115, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_115 = ( + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_8 = torch.nn.functional.silu(x_gate_8, inplace=False) + x_gate_8 = None + x_117 = silu_8 * x_116 + silu_8 = x_116 = None + x_118 = torch.nn.functional.dropout(x_117, 0.0, False, False) + x_117 = None + x_119 = torch._C._nn.linear( + x_118, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_118 = ( + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_120 = torch.nn.functional.dropout(x_119, 0.0, False, False) + x_119 = None + x_121 = x_114 + x_120 + x_114 = x_120 = None + x_122 = torch.rms_norm( + x_121, + (1536,), + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ = None + linear_45 = torch._C._nn.linear( + x_122, + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_122 = ( + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_18 = linear_45.reshape(1, 256, 3, 12, 128) + linear_45 = None + qkv_9 = reshape_18.permute(2, 0, 3, 1, 4) + reshape_18 = None + unbind_9 = qkv_9.unbind(0) + qkv_9 = None + q_9 = unbind_9[0] + k_9 = unbind_9[1] + v_9 = unbind_9[2] + unbind_9 = None + x_123 = torch._C._nn.scaled_dot_product_attention( + q_9, k_9, v_9, attn_mask=None, dropout_p=0.0 + ) + q_9 = k_9 = v_9 = None + transpose_10 = x_123.transpose(1, 2) + x_123 = None + x_124 = transpose_10.reshape(1, 256, 1536) + transpose_10 = None + x_125 = torch._C._nn.linear( + x_124, + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_124 = ( + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ + ) = None + x_126 = torch.nn.functional.dropout(x_125, 0.0, False, False) + x_125 = None + x_127 = x_121 + x_126 + x_121 = x_126 = None + x_128 = torch.rms_norm( + x_127, + (1536,), + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ = None + x_gate_9 = torch._C._nn.linear( + x_128, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_129 = torch._C._nn.linear( + x_128, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_128 = ( + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_9 = torch.nn.functional.silu(x_gate_9, inplace=False) + x_gate_9 = None + x_130 = silu_9 * x_129 + silu_9 = x_129 = None + x_131 = torch.nn.functional.dropout(x_130, 0.0, False, False) + x_130 = None + x_132 = torch._C._nn.linear( + x_131, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_131 = ( + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_133 = torch.nn.functional.dropout(x_132, 0.0, False, False) + x_132 = None + x_134 = x_127 + x_133 + x_127 = x_133 = None + x_135 = torch.rms_norm( + x_134, + (1536,), + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ = None + linear_50 = torch._C._nn.linear( + x_135, + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_135 = ( + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_20 = linear_50.reshape(1, 256, 3, 12, 128) + linear_50 = None + qkv_10 = reshape_20.permute(2, 0, 3, 1, 4) + reshape_20 = None + unbind_10 = qkv_10.unbind(0) + qkv_10 = None + q_10 = unbind_10[0] + k_10 = unbind_10[1] + v_10 = unbind_10[2] + unbind_10 = None + x_136 = torch._C._nn.scaled_dot_product_attention( + q_10, k_10, v_10, attn_mask=None, dropout_p=0.0 + ) + q_10 = k_10 = v_10 = None + transpose_11 = x_136.transpose(1, 2) + x_136 = None + x_137 = transpose_11.reshape(1, 256, 1536) + transpose_11 = None + x_138 = torch._C._nn.linear( + x_137, + l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_137 = l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ = (None) + x_139 = torch.nn.functional.dropout(x_138, 0.0, False, False) + x_138 = None + x_140 = x_134 + x_139 + x_134 = x_139 = None + x_141 = torch.rms_norm( + x_140, + (1536,), + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ = None + x_gate_10 = torch._C._nn.linear( + x_141, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_142 = torch._C._nn.linear( + x_141, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_141 = l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_10 = torch.nn.functional.silu(x_gate_10, inplace=False) + x_gate_10 = None + x_143 = silu_10 * x_142 + silu_10 = x_142 = None + x_144 = torch.nn.functional.dropout(x_143, 0.0, False, False) + x_143 = None + x_145 = torch._C._nn.linear( + x_144, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_144 = ( + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_146 = torch.nn.functional.dropout(x_145, 0.0, False, False) + x_145 = None + x_147 = x_140 + x_146 + x_140 = x_146 = None + x_148 = torch.rms_norm( + x_147, + (1536,), + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ = None + linear_55 = torch._C._nn.linear( + x_148, + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_148 = ( + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_22 = linear_55.reshape(1, 256, 3, 12, 128) + linear_55 = None + qkv_11 = reshape_22.permute(2, 0, 3, 1, 4) + reshape_22 = None + unbind_11 = qkv_11.unbind(0) + qkv_11 = None + q_11 = unbind_11[0] + k_11 = unbind_11[1] + v_11 = unbind_11[2] + unbind_11 = None + x_149 = torch._C._nn.scaled_dot_product_attention( + q_11, k_11, v_11, attn_mask=None, dropout_p=0.0 + ) + q_11 = k_11 = v_11 = None + transpose_12 = x_149.transpose(1, 2) + x_149 = None + x_150 = transpose_12.reshape(1, 256, 1536) + transpose_12 = None + x_151 = torch._C._nn.linear( + x_150, + l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_150 = l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ = (None) + x_152 = torch.nn.functional.dropout(x_151, 0.0, False, False) + x_151 = None + x_153 = x_147 + x_152 + x_147 = x_152 = None + x_154 = torch.rms_norm( + x_153, + (1536,), + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ = None + x_gate_11 = torch._C._nn.linear( + x_154, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_155 = torch._C._nn.linear( + x_154, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_154 = l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_11 = torch.nn.functional.silu(x_gate_11, inplace=False) + x_gate_11 = None + x_156 = silu_11 * x_155 + silu_11 = x_155 = None + x_157 = torch.nn.functional.dropout(x_156, 0.0, False, False) + x_156 = None + x_158 = torch._C._nn.linear( + x_157, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_157 = ( + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_159 = torch.nn.functional.dropout(x_158, 0.0, False, False) + x_158 = None + x_160 = x_153 + x_159 + x_153 = x_159 = None + x_161 = torch.rms_norm( + x_160, + (1536,), + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ = None + linear_60 = torch._C._nn.linear( + x_161, + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_161 = ( + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_24 = linear_60.reshape(1, 256, 3, 12, 128) + linear_60 = None + qkv_12 = reshape_24.permute(2, 0, 3, 1, 4) + reshape_24 = None + unbind_12 = qkv_12.unbind(0) + qkv_12 = None + q_12 = unbind_12[0] + k_12 = unbind_12[1] + v_12 = unbind_12[2] + unbind_12 = None + x_162 = torch._C._nn.scaled_dot_product_attention( + q_12, k_12, v_12, attn_mask=None, dropout_p=0.0 + ) + q_12 = k_12 = v_12 = None + transpose_13 = x_162.transpose(1, 2) + x_162 = None + x_163 = transpose_13.reshape(1, 256, 1536) + transpose_13 = None + x_164 = torch._C._nn.linear( + x_163, + l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_163 = l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ = (None) + x_165 = torch.nn.functional.dropout(x_164, 0.0, False, False) + x_164 = None + x_166 = x_160 + x_165 + x_160 = x_165 = None + x_167 = torch.rms_norm( + x_166, + (1536,), + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ = None + x_gate_12 = torch._C._nn.linear( + x_167, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_168 = torch._C._nn.linear( + x_167, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_167 = l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_12 = torch.nn.functional.silu(x_gate_12, inplace=False) + x_gate_12 = None + x_169 = silu_12 * x_168 + silu_12 = x_168 = None + x_170 = torch.nn.functional.dropout(x_169, 0.0, False, False) + x_169 = None + x_171 = torch._C._nn.linear( + x_170, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_170 = ( + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_172 = torch.nn.functional.dropout(x_171, 0.0, False, False) + x_171 = None + x_173 = x_166 + x_172 + x_166 = x_172 = None + x_174 = torch.rms_norm( + x_173, + (1536,), + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ = None + linear_65 = torch._C._nn.linear( + x_174, + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_174 = ( + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_26 = linear_65.reshape(1, 256, 3, 12, 128) + linear_65 = None + qkv_13 = reshape_26.permute(2, 0, 3, 1, 4) + reshape_26 = None + unbind_13 = qkv_13.unbind(0) + qkv_13 = None + q_13 = unbind_13[0] + k_13 = unbind_13[1] + v_13 = unbind_13[2] + unbind_13 = None + x_175 = torch._C._nn.scaled_dot_product_attention( + q_13, k_13, v_13, attn_mask=None, dropout_p=0.0 + ) + q_13 = k_13 = v_13 = None + transpose_14 = x_175.transpose(1, 2) + x_175 = None + x_176 = transpose_14.reshape(1, 256, 1536) + transpose_14 = None + x_177 = torch._C._nn.linear( + x_176, + l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_176 = l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ = (None) + x_178 = torch.nn.functional.dropout(x_177, 0.0, False, False) + x_177 = None + x_179 = x_173 + x_178 + x_173 = x_178 = None + x_180 = torch.rms_norm( + x_179, + (1536,), + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ = None + x_gate_13 = torch._C._nn.linear( + x_180, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_181 = torch._C._nn.linear( + x_180, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_180 = l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_13 = torch.nn.functional.silu(x_gate_13, inplace=False) + x_gate_13 = None + x_182 = silu_13 * x_181 + silu_13 = x_181 = None + x_183 = torch.nn.functional.dropout(x_182, 0.0, False, False) + x_182 = None + x_184 = torch._C._nn.linear( + x_183, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_183 = ( + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_185 = torch.nn.functional.dropout(x_184, 0.0, False, False) + x_184 = None + x_186 = x_179 + x_185 + x_179 = x_185 = None + x_187 = torch.rms_norm( + x_186, + (1536,), + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ = None + linear_70 = torch._C._nn.linear( + x_187, + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_187 = ( + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_28 = linear_70.reshape(1, 256, 3, 12, 128) + linear_70 = None + qkv_14 = reshape_28.permute(2, 0, 3, 1, 4) + reshape_28 = None + unbind_14 = qkv_14.unbind(0) + qkv_14 = None + q_14 = unbind_14[0] + k_14 = unbind_14[1] + v_14 = unbind_14[2] + unbind_14 = None + x_188 = torch._C._nn.scaled_dot_product_attention( + q_14, k_14, v_14, attn_mask=None, dropout_p=0.0 + ) + q_14 = k_14 = v_14 = None + transpose_15 = x_188.transpose(1, 2) + x_188 = None + x_189 = transpose_15.reshape(1, 256, 1536) + transpose_15 = None + x_190 = torch._C._nn.linear( + x_189, + l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_189 = l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ = (None) + x_191 = torch.nn.functional.dropout(x_190, 0.0, False, False) + x_190 = None + x_192 = x_186 + x_191 + x_186 = x_191 = None + x_193 = torch.rms_norm( + x_192, + (1536,), + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ = None + x_gate_14 = torch._C._nn.linear( + x_193, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_194 = torch._C._nn.linear( + x_193, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_193 = l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_14 = torch.nn.functional.silu(x_gate_14, inplace=False) + x_gate_14 = None + x_195 = silu_14 * x_194 + silu_14 = x_194 = None + x_196 = torch.nn.functional.dropout(x_195, 0.0, False, False) + x_195 = None + x_197 = torch._C._nn.linear( + x_196, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_196 = ( + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_198 = torch.nn.functional.dropout(x_197, 0.0, False, False) + x_197 = None + x_199 = x_192 + x_198 + x_192 = x_198 = None + x_200 = torch.rms_norm( + x_199, + (1536,), + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ = None + linear_75 = torch._C._nn.linear( + x_200, + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_200 = ( + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_30 = linear_75.reshape(1, 256, 3, 12, 128) + linear_75 = None + qkv_15 = reshape_30.permute(2, 0, 3, 1, 4) + reshape_30 = None + unbind_15 = qkv_15.unbind(0) + qkv_15 = None + q_15 = unbind_15[0] + k_15 = unbind_15[1] + v_15 = unbind_15[2] + unbind_15 = None + x_201 = torch._C._nn.scaled_dot_product_attention( + q_15, k_15, v_15, attn_mask=None, dropout_p=0.0 + ) + q_15 = k_15 = v_15 = None + transpose_16 = x_201.transpose(1, 2) + x_201 = None + x_202 = transpose_16.reshape(1, 256, 1536) + transpose_16 = None + x_203 = torch._C._nn.linear( + x_202, + l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_202 = l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ = (None) + x_204 = torch.nn.functional.dropout(x_203, 0.0, False, False) + x_203 = None + x_205 = x_199 + x_204 + x_199 = x_204 = None + x_206 = torch.rms_norm( + x_205, + (1536,), + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ = None + x_gate_15 = torch._C._nn.linear( + x_206, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_207 = torch._C._nn.linear( + x_206, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_206 = l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_15 = torch.nn.functional.silu(x_gate_15, inplace=False) + x_gate_15 = None + x_208 = silu_15 * x_207 + silu_15 = x_207 = None + x_209 = torch.nn.functional.dropout(x_208, 0.0, False, False) + x_208 = None + x_210 = torch._C._nn.linear( + x_209, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_209 = ( + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_211 = torch.nn.functional.dropout(x_210, 0.0, False, False) + x_210 = None + x_212 = x_205 + x_211 + x_205 = x_211 = None + x_213 = torch.rms_norm( + x_212, + (1536,), + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ = None + linear_80 = torch._C._nn.linear( + x_213, + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_213 = ( + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_32 = linear_80.reshape(1, 256, 3, 12, 128) + linear_80 = None + qkv_16 = reshape_32.permute(2, 0, 3, 1, 4) + reshape_32 = None + unbind_16 = qkv_16.unbind(0) + qkv_16 = None + q_16 = unbind_16[0] + k_16 = unbind_16[1] + v_16 = unbind_16[2] + unbind_16 = None + x_214 = torch._C._nn.scaled_dot_product_attention( + q_16, k_16, v_16, attn_mask=None, dropout_p=0.0 + ) + q_16 = k_16 = v_16 = None + transpose_17 = x_214.transpose(1, 2) + x_214 = None + x_215 = transpose_17.reshape(1, 256, 1536) + transpose_17 = None + x_216 = torch._C._nn.linear( + x_215, + l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_215 = l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ = (None) + x_217 = torch.nn.functional.dropout(x_216, 0.0, False, False) + x_216 = None + x_218 = x_212 + x_217 + x_212 = x_217 = None + x_219 = torch.rms_norm( + x_218, + (1536,), + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ = None + x_gate_16 = torch._C._nn.linear( + x_219, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_220 = torch._C._nn.linear( + x_219, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_219 = l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_16 = torch.nn.functional.silu(x_gate_16, inplace=False) + x_gate_16 = None + x_221 = silu_16 * x_220 + silu_16 = x_220 = None + x_222 = torch.nn.functional.dropout(x_221, 0.0, False, False) + x_221 = None + x_223 = torch._C._nn.linear( + x_222, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_222 = ( + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_224 = torch.nn.functional.dropout(x_223, 0.0, False, False) + x_223 = None + x_225 = x_218 + x_224 + x_218 = x_224 = None + x_226 = torch.rms_norm( + x_225, + (1536,), + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ = None + linear_85 = torch._C._nn.linear( + x_226, + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_226 = ( + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_34 = linear_85.reshape(1, 256, 3, 12, 128) + linear_85 = None + qkv_17 = reshape_34.permute(2, 0, 3, 1, 4) + reshape_34 = None + unbind_17 = qkv_17.unbind(0) + qkv_17 = None + q_17 = unbind_17[0] + k_17 = unbind_17[1] + v_17 = unbind_17[2] + unbind_17 = None + x_227 = torch._C._nn.scaled_dot_product_attention( + q_17, k_17, v_17, attn_mask=None, dropout_p=0.0 + ) + q_17 = k_17 = v_17 = None + transpose_18 = x_227.transpose(1, 2) + x_227 = None + x_228 = transpose_18.reshape(1, 256, 1536) + transpose_18 = None + x_229 = torch._C._nn.linear( + x_228, + l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_228 = l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ = (None) + x_230 = torch.nn.functional.dropout(x_229, 0.0, False, False) + x_229 = None + x_231 = x_225 + x_230 + x_225 = x_230 = None + x_232 = torch.rms_norm( + x_231, + (1536,), + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ = None + x_gate_17 = torch._C._nn.linear( + x_232, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_233 = torch._C._nn.linear( + x_232, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_232 = l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_17 = torch.nn.functional.silu(x_gate_17, inplace=False) + x_gate_17 = None + x_234 = silu_17 * x_233 + silu_17 = x_233 = None + x_235 = torch.nn.functional.dropout(x_234, 0.0, False, False) + x_234 = None + x_236 = torch._C._nn.linear( + x_235, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_235 = ( + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_237 = torch.nn.functional.dropout(x_236, 0.0, False, False) + x_236 = None + x_238 = x_231 + x_237 + x_231 = x_237 = None + x_239 = torch.rms_norm( + x_238, + (1536,), + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ = None + linear_90 = torch._C._nn.linear( + x_239, + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_239 = ( + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_36 = linear_90.reshape(1, 256, 3, 12, 128) + linear_90 = None + qkv_18 = reshape_36.permute(2, 0, 3, 1, 4) + reshape_36 = None + unbind_18 = qkv_18.unbind(0) + qkv_18 = None + q_18 = unbind_18[0] + k_18 = unbind_18[1] + v_18 = unbind_18[2] + unbind_18 = None + x_240 = torch._C._nn.scaled_dot_product_attention( + q_18, k_18, v_18, attn_mask=None, dropout_p=0.0 + ) + q_18 = k_18 = v_18 = None + transpose_19 = x_240.transpose(1, 2) + x_240 = None + x_241 = transpose_19.reshape(1, 256, 1536) + transpose_19 = None + x_242 = torch._C._nn.linear( + x_241, + l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_241 = l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ = (None) + x_243 = torch.nn.functional.dropout(x_242, 0.0, False, False) + x_242 = None + x_244 = x_238 + x_243 + x_238 = x_243 = None + x_245 = torch.rms_norm( + x_244, + (1536,), + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ = None + x_gate_18 = torch._C._nn.linear( + x_245, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_246 = torch._C._nn.linear( + x_245, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_245 = l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_18 = torch.nn.functional.silu(x_gate_18, inplace=False) + x_gate_18 = None + x_247 = silu_18 * x_246 + silu_18 = x_246 = None + x_248 = torch.nn.functional.dropout(x_247, 0.0, False, False) + x_247 = None + x_249 = torch._C._nn.linear( + x_248, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_248 = ( + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_250 = torch.nn.functional.dropout(x_249, 0.0, False, False) + x_249 = None + x_251 = x_244 + x_250 + x_244 = x_250 = None + x_252 = torch.rms_norm( + x_251, + (1536,), + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ = None + linear_95 = torch._C._nn.linear( + x_252, + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_252 = ( + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_38 = linear_95.reshape(1, 256, 3, 12, 128) + linear_95 = None + qkv_19 = reshape_38.permute(2, 0, 3, 1, 4) + reshape_38 = None + unbind_19 = qkv_19.unbind(0) + qkv_19 = None + q_19 = unbind_19[0] + k_19 = unbind_19[1] + v_19 = unbind_19[2] + unbind_19 = None + x_253 = torch._C._nn.scaled_dot_product_attention( + q_19, k_19, v_19, attn_mask=None, dropout_p=0.0 + ) + q_19 = k_19 = v_19 = None + transpose_20 = x_253.transpose(1, 2) + x_253 = None + x_254 = transpose_20.reshape(1, 256, 1536) + transpose_20 = None + x_255 = torch._C._nn.linear( + x_254, + l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_254 = l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ = (None) + x_256 = torch.nn.functional.dropout(x_255, 0.0, False, False) + x_255 = None + x_257 = x_251 + x_256 + x_251 = x_256 = None + x_258 = torch.rms_norm( + x_257, + (1536,), + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ = None + x_gate_19 = torch._C._nn.linear( + x_258, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_259 = torch._C._nn.linear( + x_258, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_258 = l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_19 = torch.nn.functional.silu(x_gate_19, inplace=False) + x_gate_19 = None + x_260 = silu_19 * x_259 + silu_19 = x_259 = None + x_261 = torch.nn.functional.dropout(x_260, 0.0, False, False) + x_260 = None + x_262 = torch._C._nn.linear( + x_261, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_261 = ( + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_263 = torch.nn.functional.dropout(x_262, 0.0, False, False) + x_262 = None + x_264 = x_257 + x_263 + x_257 = x_263 = None + x_265 = torch.rms_norm( + x_264, + (1536,), + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ = None + linear_100 = torch._C._nn.linear( + x_265, + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_265 = ( + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_40 = linear_100.reshape(1, 256, 3, 12, 128) + linear_100 = None + qkv_20 = reshape_40.permute(2, 0, 3, 1, 4) + reshape_40 = None + unbind_20 = qkv_20.unbind(0) + qkv_20 = None + q_20 = unbind_20[0] + k_20 = unbind_20[1] + v_20 = unbind_20[2] + unbind_20 = None + x_266 = torch._C._nn.scaled_dot_product_attention( + q_20, k_20, v_20, attn_mask=None, dropout_p=0.0 + ) + q_20 = k_20 = v_20 = None + transpose_21 = x_266.transpose(1, 2) + x_266 = None + x_267 = transpose_21.reshape(1, 256, 1536) + transpose_21 = None + x_268 = torch._C._nn.linear( + x_267, + l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_267 = l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ = (None) + x_269 = torch.nn.functional.dropout(x_268, 0.0, False, False) + x_268 = None + x_270 = x_264 + x_269 + x_264 = x_269 = None + x_271 = torch.rms_norm( + x_270, + (1536,), + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ = None + x_gate_20 = torch._C._nn.linear( + x_271, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_272 = torch._C._nn.linear( + x_271, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_271 = l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_20 = torch.nn.functional.silu(x_gate_20, inplace=False) + x_gate_20 = None + x_273 = silu_20 * x_272 + silu_20 = x_272 = None + x_274 = torch.nn.functional.dropout(x_273, 0.0, False, False) + x_273 = None + x_275 = torch._C._nn.linear( + x_274, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_274 = ( + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_276 = torch.nn.functional.dropout(x_275, 0.0, False, False) + x_275 = None + x_277 = x_270 + x_276 + x_270 = x_276 = None + x_278 = torch.rms_norm( + x_277, + (1536,), + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ = None + linear_105 = torch._C._nn.linear( + x_278, + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_278 = ( + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_42 = linear_105.reshape(1, 256, 3, 12, 128) + linear_105 = None + qkv_21 = reshape_42.permute(2, 0, 3, 1, 4) + reshape_42 = None + unbind_21 = qkv_21.unbind(0) + qkv_21 = None + q_21 = unbind_21[0] + k_21 = unbind_21[1] + v_21 = unbind_21[2] + unbind_21 = None + x_279 = torch._C._nn.scaled_dot_product_attention( + q_21, k_21, v_21, attn_mask=None, dropout_p=0.0 + ) + q_21 = k_21 = v_21 = None + transpose_22 = x_279.transpose(1, 2) + x_279 = None + x_280 = transpose_22.reshape(1, 256, 1536) + transpose_22 = None + x_281 = torch._C._nn.linear( + x_280, + l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_280 = l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ = (None) + x_282 = torch.nn.functional.dropout(x_281, 0.0, False, False) + x_281 = None + x_283 = x_277 + x_282 + x_277 = x_282 = None + x_284 = torch.rms_norm( + x_283, + (1536,), + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ = None + x_gate_21 = torch._C._nn.linear( + x_284, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_285 = torch._C._nn.linear( + x_284, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_284 = l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_21 = torch.nn.functional.silu(x_gate_21, inplace=False) + x_gate_21 = None + x_286 = silu_21 * x_285 + silu_21 = x_285 = None + x_287 = torch.nn.functional.dropout(x_286, 0.0, False, False) + x_286 = None + x_288 = torch._C._nn.linear( + x_287, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_287 = ( + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_289 = torch.nn.functional.dropout(x_288, 0.0, False, False) + x_288 = None + x_290 = x_283 + x_289 + x_283 = x_289 = None + x_291 = torch.rms_norm( + x_290, + (1536,), + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ = None + linear_110 = torch._C._nn.linear( + x_291, + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_291 = ( + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_44 = linear_110.reshape(1, 256, 3, 12, 128) + linear_110 = None + qkv_22 = reshape_44.permute(2, 0, 3, 1, 4) + reshape_44 = None + unbind_22 = qkv_22.unbind(0) + qkv_22 = None + q_22 = unbind_22[0] + k_22 = unbind_22[1] + v_22 = unbind_22[2] + unbind_22 = None + x_292 = torch._C._nn.scaled_dot_product_attention( + q_22, k_22, v_22, attn_mask=None, dropout_p=0.0 + ) + q_22 = k_22 = v_22 = None + transpose_23 = x_292.transpose(1, 2) + x_292 = None + x_293 = transpose_23.reshape(1, 256, 1536) + transpose_23 = None + x_294 = torch._C._nn.linear( + x_293, + l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_293 = l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ = (None) + x_295 = torch.nn.functional.dropout(x_294, 0.0, False, False) + x_294 = None + x_296 = x_290 + x_295 + x_290 = x_295 = None + x_297 = torch.rms_norm( + x_296, + (1536,), + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ = None + x_gate_22 = torch._C._nn.linear( + x_297, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_298 = torch._C._nn.linear( + x_297, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_297 = l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_22 = torch.nn.functional.silu(x_gate_22, inplace=False) + x_gate_22 = None + x_299 = silu_22 * x_298 + silu_22 = x_298 = None + x_300 = torch.nn.functional.dropout(x_299, 0.0, False, False) + x_299 = None + x_301 = torch._C._nn.linear( + x_300, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_300 = ( + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_302 = torch.nn.functional.dropout(x_301, 0.0, False, False) + x_301 = None + x_303 = x_296 + x_302 + x_296 = x_302 = None + x_304 = torch.rms_norm( + x_303, + (1536,), + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ = None + linear_115 = torch._C._nn.linear( + x_304, + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_304 = ( + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_46 = linear_115.reshape(1, 256, 3, 12, 128) + linear_115 = None + qkv_23 = reshape_46.permute(2, 0, 3, 1, 4) + reshape_46 = None + unbind_23 = qkv_23.unbind(0) + qkv_23 = None + q_23 = unbind_23[0] + k_23 = unbind_23[1] + v_23 = unbind_23[2] + unbind_23 = None + x_305 = torch._C._nn.scaled_dot_product_attention( + q_23, k_23, v_23, attn_mask=None, dropout_p=0.0 + ) + q_23 = k_23 = v_23 = None + transpose_24 = x_305.transpose(1, 2) + x_305 = None + x_306 = transpose_24.reshape(1, 256, 1536) + transpose_24 = None + x_307 = torch._C._nn.linear( + x_306, + l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_306 = l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ = (None) + x_308 = torch.nn.functional.dropout(x_307, 0.0, False, False) + x_307 = None + x_309 = x_303 + x_308 + x_303 = x_308 = None + x_310 = torch.rms_norm( + x_309, + (1536,), + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ = None + x_gate_23 = torch._C._nn.linear( + x_310, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_311 = torch._C._nn.linear( + x_310, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_310 = l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_23 = torch.nn.functional.silu(x_gate_23, inplace=False) + x_gate_23 = None + x_312 = silu_23 * x_311 + silu_23 = x_311 = None + x_313 = torch.nn.functional.dropout(x_312, 0.0, False, False) + x_312 = None + x_314 = torch._C._nn.linear( + x_313, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_313 = ( + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_315 = torch.nn.functional.dropout(x_314, 0.0, False, False) + x_314 = None + x_316 = x_309 + x_315 + x_309 = x_315 = None + x_317 = torch.rms_norm( + x_316, (1536,), l_self_modules_norm_parameters_weight_, 1e-05 + ) + x_316 = l_self_modules_norm_parameters_weight_ = None + x_318 = x_317[ + (slice(None, None, None), slice(l_self_num_prefix_tokens, None, None)) + ] + x_317 = l_self_num_prefix_tokens = None + x_319 = x_318.mean(dim=1) + x_318 = None + x_320 = torch.nn.functional.dropout(x_319, 0.0, False, False) + x_319 = None + return (x_320,) diff --git a/samples/timm/aimv2_huge_patch14_224.apple_pt/weight_meta.py b/samples/timm/aimv2_huge_patch14_224.apple_pt/weight_meta.py new file mode 100644 index 000000000..e473ff405 --- /dev/null +++ b/samples/timm/aimv2_huge_patch14_224.apple_pt/weight_meta.py @@ -0,0 +1,1920 @@ +class Program_weight_tensor_meta_L_x_: + name = "L_x_" + shape = [1, 3, 224, 224] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.232 + std = 1.288 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_proj_parameters_weight_: + name = "L_self_modules_patch_embed_modules_proj_parameters_weight_" + shape = [1536, 3, 14, 14] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.024 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_proj_parameters_bias_: + name = "L_self_modules_patch_embed_modules_proj_parameters_bias_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.024 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_norm_parameters_weight_: + name = "L_self_modules_patch_embed_modules_norm_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_parameters_pos_embed_: + name = "L_self_parameters_pos_embed_" + shape = [1, 256, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_" + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [4608, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1536, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [4096, 1536] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_" + shape = [1536, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_norm_parameters_weight_: + name = "L_self_modules_norm_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_num_prefix_tokens: + name = "L_self_num_prefix_tokens" + shape = [] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + data = [4] diff --git a/samples/timm/aimv2_large_patch14_224.apple_pt/graph_hash.txt b/samples/timm/aimv2_large_patch14_224.apple_pt/graph_hash.txt new file mode 100644 index 000000000..fec1e0b25 --- /dev/null +++ b/samples/timm/aimv2_large_patch14_224.apple_pt/graph_hash.txt @@ -0,0 +1 @@ +b3d1b0a0f52857e51f4dddd46b99a86dfb3d43e77e14d5859924120583f5a244 \ No newline at end of file diff --git a/samples/timm/aimv2_large_patch14_224.apple_pt/graph_net.json b/samples/timm/aimv2_large_patch14_224.apple_pt/graph_net.json new file mode 100644 index 000000000..1373fe3b5 --- /dev/null +++ b/samples/timm/aimv2_large_patch14_224.apple_pt/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/samples/timm/aimv2_large_patch14_224.apple_pt/input_meta.py b/samples/timm/aimv2_large_patch14_224.apple_pt/input_meta.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/timm/aimv2_large_patch14_224.apple_pt/input_tensor_constraints.py b/samples/timm/aimv2_large_patch14_224.apple_pt/input_tensor_constraints.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/timm/aimv2_large_patch14_224.apple_pt/model.py b/samples/timm/aimv2_large_patch14_224.apple_pt/model.py new file mode 100644 index 000000000..1f2962d42 --- /dev/null +++ b/samples/timm/aimv2_large_patch14_224.apple_pt/model.py @@ -0,0 +1,2663 @@ +import torch + + +class GraphModule(torch.nn.Module): + def forward( + self, + L_x_: torch.Tensor, + L_self_modules_patch_embed_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_patch_embed_modules_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_patch_embed_modules_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_parameters_pos_embed_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_num_prefix_tokens: torch.SymInt, + ): + l_x_ = L_x_ + l_self_modules_patch_embed_modules_proj_parameters_weight_ = ( + L_self_modules_patch_embed_modules_proj_parameters_weight_ + ) + l_self_modules_patch_embed_modules_proj_parameters_bias_ = ( + L_self_modules_patch_embed_modules_proj_parameters_bias_ + ) + l_self_modules_patch_embed_modules_norm_parameters_weight_ = ( + L_self_modules_patch_embed_modules_norm_parameters_weight_ + ) + l_self_parameters_pos_embed_ = L_self_parameters_pos_embed_ + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ = L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ + ) + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ = L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ = L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ = ( + L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ + ) + l_self_modules_norm_parameters_weight_ = L_self_modules_norm_parameters_weight_ + l_self_num_prefix_tokens = L_self_num_prefix_tokens + x = torch.conv2d( + l_x_, + l_self_modules_patch_embed_modules_proj_parameters_weight_, + l_self_modules_patch_embed_modules_proj_parameters_bias_, + (14, 14), + (0, 0), + (1, 1), + 1, + ) + l_x_ = ( + l_self_modules_patch_embed_modules_proj_parameters_weight_ + ) = l_self_modules_patch_embed_modules_proj_parameters_bias_ = None + flatten = x.flatten(2) + x = None + x_1 = flatten.transpose(1, 2) + flatten = None + x_2 = torch.rms_norm( + x_1, + (1024,), + l_self_modules_patch_embed_modules_norm_parameters_weight_, + 1e-05, + ) + x_1 = l_self_modules_patch_embed_modules_norm_parameters_weight_ = None + x_3 = x_2 + l_self_parameters_pos_embed_ + x_2 = l_self_parameters_pos_embed_ = None + x_4 = torch.nn.functional.dropout(x_3, 0.0, False, False) + x_3 = None + x_5 = torch.rms_norm( + x_4, + (1024,), + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_0_modules_norm1_parameters_weight_ = None + linear = torch._C._nn.linear( + x_5, + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_5 = ( + l_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape = linear.reshape(1, 256, 3, 8, 128) + linear = None + qkv = reshape.permute(2, 0, 3, 1, 4) + reshape = None + unbind = qkv.unbind(0) + qkv = None + q = unbind[0] + k = unbind[1] + v = unbind[2] + unbind = None + x_6 = torch._C._nn.scaled_dot_product_attention( + q, k, v, attn_mask=None, dropout_p=0.0 + ) + q = k = v = None + transpose_1 = x_6.transpose(1, 2) + x_6 = None + x_7 = transpose_1.reshape(1, 256, 1024) + transpose_1 = None + x_8 = torch._C._nn.linear( + x_7, + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_7 = ( + l_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_ + ) = None + x_9 = torch.nn.functional.dropout(x_8, 0.0, False, False) + x_8 = None + x_10 = x_4 + x_9 + x_4 = x_9 = None + x_11 = torch.rms_norm( + x_10, + (1024,), + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_0_modules_norm2_parameters_weight_ = None + x_gate = torch._C._nn.linear( + x_11, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_12 = torch._C._nn.linear( + x_11, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_11 = ( + l_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu = torch.nn.functional.silu(x_gate, inplace=False) + x_gate = None + x_13 = silu * x_12 + silu = x_12 = None + x_14 = torch.nn.functional.dropout(x_13, 0.0, False, False) + x_13 = None + x_15 = torch._C._nn.linear( + x_14, + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_14 = ( + l_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_16 = torch.nn.functional.dropout(x_15, 0.0, False, False) + x_15 = None + x_17 = x_10 + x_16 + x_10 = x_16 = None + x_18 = torch.rms_norm( + x_17, + (1024,), + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_1_modules_norm1_parameters_weight_ = None + linear_5 = torch._C._nn.linear( + x_18, + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_18 = ( + l_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_2 = linear_5.reshape(1, 256, 3, 8, 128) + linear_5 = None + qkv_1 = reshape_2.permute(2, 0, 3, 1, 4) + reshape_2 = None + unbind_1 = qkv_1.unbind(0) + qkv_1 = None + q_1 = unbind_1[0] + k_1 = unbind_1[1] + v_1 = unbind_1[2] + unbind_1 = None + x_19 = torch._C._nn.scaled_dot_product_attention( + q_1, k_1, v_1, attn_mask=None, dropout_p=0.0 + ) + q_1 = k_1 = v_1 = None + transpose_2 = x_19.transpose(1, 2) + x_19 = None + x_20 = transpose_2.reshape(1, 256, 1024) + transpose_2 = None + x_21 = torch._C._nn.linear( + x_20, + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_20 = ( + l_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_ + ) = None + x_22 = torch.nn.functional.dropout(x_21, 0.0, False, False) + x_21 = None + x_23 = x_17 + x_22 + x_17 = x_22 = None + x_24 = torch.rms_norm( + x_23, + (1024,), + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_1_modules_norm2_parameters_weight_ = None + x_gate_1 = torch._C._nn.linear( + x_24, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_25 = torch._C._nn.linear( + x_24, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_24 = ( + l_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_1 = torch.nn.functional.silu(x_gate_1, inplace=False) + x_gate_1 = None + x_26 = silu_1 * x_25 + silu_1 = x_25 = None + x_27 = torch.nn.functional.dropout(x_26, 0.0, False, False) + x_26 = None + x_28 = torch._C._nn.linear( + x_27, + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_27 = ( + l_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_29 = torch.nn.functional.dropout(x_28, 0.0, False, False) + x_28 = None + x_30 = x_23 + x_29 + x_23 = x_29 = None + x_31 = torch.rms_norm( + x_30, + (1024,), + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_2_modules_norm1_parameters_weight_ = None + linear_10 = torch._C._nn.linear( + x_31, + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_31 = ( + l_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_4 = linear_10.reshape(1, 256, 3, 8, 128) + linear_10 = None + qkv_2 = reshape_4.permute(2, 0, 3, 1, 4) + reshape_4 = None + unbind_2 = qkv_2.unbind(0) + qkv_2 = None + q_2 = unbind_2[0] + k_2 = unbind_2[1] + v_2 = unbind_2[2] + unbind_2 = None + x_32 = torch._C._nn.scaled_dot_product_attention( + q_2, k_2, v_2, attn_mask=None, dropout_p=0.0 + ) + q_2 = k_2 = v_2 = None + transpose_3 = x_32.transpose(1, 2) + x_32 = None + x_33 = transpose_3.reshape(1, 256, 1024) + transpose_3 = None + x_34 = torch._C._nn.linear( + x_33, + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_33 = ( + l_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_ + ) = None + x_35 = torch.nn.functional.dropout(x_34, 0.0, False, False) + x_34 = None + x_36 = x_30 + x_35 + x_30 = x_35 = None + x_37 = torch.rms_norm( + x_36, + (1024,), + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_2_modules_norm2_parameters_weight_ = None + x_gate_2 = torch._C._nn.linear( + x_37, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_38 = torch._C._nn.linear( + x_37, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_37 = ( + l_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_2 = torch.nn.functional.silu(x_gate_2, inplace=False) + x_gate_2 = None + x_39 = silu_2 * x_38 + silu_2 = x_38 = None + x_40 = torch.nn.functional.dropout(x_39, 0.0, False, False) + x_39 = None + x_41 = torch._C._nn.linear( + x_40, + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_40 = ( + l_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_42 = torch.nn.functional.dropout(x_41, 0.0, False, False) + x_41 = None + x_43 = x_36 + x_42 + x_36 = x_42 = None + x_44 = torch.rms_norm( + x_43, + (1024,), + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_3_modules_norm1_parameters_weight_ = None + linear_15 = torch._C._nn.linear( + x_44, + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_44 = ( + l_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_6 = linear_15.reshape(1, 256, 3, 8, 128) + linear_15 = None + qkv_3 = reshape_6.permute(2, 0, 3, 1, 4) + reshape_6 = None + unbind_3 = qkv_3.unbind(0) + qkv_3 = None + q_3 = unbind_3[0] + k_3 = unbind_3[1] + v_3 = unbind_3[2] + unbind_3 = None + x_45 = torch._C._nn.scaled_dot_product_attention( + q_3, k_3, v_3, attn_mask=None, dropout_p=0.0 + ) + q_3 = k_3 = v_3 = None + transpose_4 = x_45.transpose(1, 2) + x_45 = None + x_46 = transpose_4.reshape(1, 256, 1024) + transpose_4 = None + x_47 = torch._C._nn.linear( + x_46, + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_46 = ( + l_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_ + ) = None + x_48 = torch.nn.functional.dropout(x_47, 0.0, False, False) + x_47 = None + x_49 = x_43 + x_48 + x_43 = x_48 = None + x_50 = torch.rms_norm( + x_49, + (1024,), + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_3_modules_norm2_parameters_weight_ = None + x_gate_3 = torch._C._nn.linear( + x_50, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_51 = torch._C._nn.linear( + x_50, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_50 = ( + l_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_3 = torch.nn.functional.silu(x_gate_3, inplace=False) + x_gate_3 = None + x_52 = silu_3 * x_51 + silu_3 = x_51 = None + x_53 = torch.nn.functional.dropout(x_52, 0.0, False, False) + x_52 = None + x_54 = torch._C._nn.linear( + x_53, + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_53 = ( + l_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_55 = torch.nn.functional.dropout(x_54, 0.0, False, False) + x_54 = None + x_56 = x_49 + x_55 + x_49 = x_55 = None + x_57 = torch.rms_norm( + x_56, + (1024,), + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_4_modules_norm1_parameters_weight_ = None + linear_20 = torch._C._nn.linear( + x_57, + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_57 = ( + l_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_8 = linear_20.reshape(1, 256, 3, 8, 128) + linear_20 = None + qkv_4 = reshape_8.permute(2, 0, 3, 1, 4) + reshape_8 = None + unbind_4 = qkv_4.unbind(0) + qkv_4 = None + q_4 = unbind_4[0] + k_4 = unbind_4[1] + v_4 = unbind_4[2] + unbind_4 = None + x_58 = torch._C._nn.scaled_dot_product_attention( + q_4, k_4, v_4, attn_mask=None, dropout_p=0.0 + ) + q_4 = k_4 = v_4 = None + transpose_5 = x_58.transpose(1, 2) + x_58 = None + x_59 = transpose_5.reshape(1, 256, 1024) + transpose_5 = None + x_60 = torch._C._nn.linear( + x_59, + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_59 = ( + l_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_ + ) = None + x_61 = torch.nn.functional.dropout(x_60, 0.0, False, False) + x_60 = None + x_62 = x_56 + x_61 + x_56 = x_61 = None + x_63 = torch.rms_norm( + x_62, + (1024,), + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_4_modules_norm2_parameters_weight_ = None + x_gate_4 = torch._C._nn.linear( + x_63, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_64 = torch._C._nn.linear( + x_63, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_63 = ( + l_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_4 = torch.nn.functional.silu(x_gate_4, inplace=False) + x_gate_4 = None + x_65 = silu_4 * x_64 + silu_4 = x_64 = None + x_66 = torch.nn.functional.dropout(x_65, 0.0, False, False) + x_65 = None + x_67 = torch._C._nn.linear( + x_66, + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_66 = ( + l_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_68 = torch.nn.functional.dropout(x_67, 0.0, False, False) + x_67 = None + x_69 = x_62 + x_68 + x_62 = x_68 = None + x_70 = torch.rms_norm( + x_69, + (1024,), + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_5_modules_norm1_parameters_weight_ = None + linear_25 = torch._C._nn.linear( + x_70, + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_70 = ( + l_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_10 = linear_25.reshape(1, 256, 3, 8, 128) + linear_25 = None + qkv_5 = reshape_10.permute(2, 0, 3, 1, 4) + reshape_10 = None + unbind_5 = qkv_5.unbind(0) + qkv_5 = None + q_5 = unbind_5[0] + k_5 = unbind_5[1] + v_5 = unbind_5[2] + unbind_5 = None + x_71 = torch._C._nn.scaled_dot_product_attention( + q_5, k_5, v_5, attn_mask=None, dropout_p=0.0 + ) + q_5 = k_5 = v_5 = None + transpose_6 = x_71.transpose(1, 2) + x_71 = None + x_72 = transpose_6.reshape(1, 256, 1024) + transpose_6 = None + x_73 = torch._C._nn.linear( + x_72, + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_72 = ( + l_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_ + ) = None + x_74 = torch.nn.functional.dropout(x_73, 0.0, False, False) + x_73 = None + x_75 = x_69 + x_74 + x_69 = x_74 = None + x_76 = torch.rms_norm( + x_75, + (1024,), + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_5_modules_norm2_parameters_weight_ = None + x_gate_5 = torch._C._nn.linear( + x_76, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_77 = torch._C._nn.linear( + x_76, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_76 = ( + l_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_5 = torch.nn.functional.silu(x_gate_5, inplace=False) + x_gate_5 = None + x_78 = silu_5 * x_77 + silu_5 = x_77 = None + x_79 = torch.nn.functional.dropout(x_78, 0.0, False, False) + x_78 = None + x_80 = torch._C._nn.linear( + x_79, + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_79 = ( + l_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_81 = torch.nn.functional.dropout(x_80, 0.0, False, False) + x_80 = None + x_82 = x_75 + x_81 + x_75 = x_81 = None + x_83 = torch.rms_norm( + x_82, + (1024,), + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_6_modules_norm1_parameters_weight_ = None + linear_30 = torch._C._nn.linear( + x_83, + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_83 = ( + l_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_12 = linear_30.reshape(1, 256, 3, 8, 128) + linear_30 = None + qkv_6 = reshape_12.permute(2, 0, 3, 1, 4) + reshape_12 = None + unbind_6 = qkv_6.unbind(0) + qkv_6 = None + q_6 = unbind_6[0] + k_6 = unbind_6[1] + v_6 = unbind_6[2] + unbind_6 = None + x_84 = torch._C._nn.scaled_dot_product_attention( + q_6, k_6, v_6, attn_mask=None, dropout_p=0.0 + ) + q_6 = k_6 = v_6 = None + transpose_7 = x_84.transpose(1, 2) + x_84 = None + x_85 = transpose_7.reshape(1, 256, 1024) + transpose_7 = None + x_86 = torch._C._nn.linear( + x_85, + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_85 = ( + l_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_ + ) = None + x_87 = torch.nn.functional.dropout(x_86, 0.0, False, False) + x_86 = None + x_88 = x_82 + x_87 + x_82 = x_87 = None + x_89 = torch.rms_norm( + x_88, + (1024,), + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_6_modules_norm2_parameters_weight_ = None + x_gate_6 = torch._C._nn.linear( + x_89, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_90 = torch._C._nn.linear( + x_89, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_89 = ( + l_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_6 = torch.nn.functional.silu(x_gate_6, inplace=False) + x_gate_6 = None + x_91 = silu_6 * x_90 + silu_6 = x_90 = None + x_92 = torch.nn.functional.dropout(x_91, 0.0, False, False) + x_91 = None + x_93 = torch._C._nn.linear( + x_92, + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_92 = ( + l_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_94 = torch.nn.functional.dropout(x_93, 0.0, False, False) + x_93 = None + x_95 = x_88 + x_94 + x_88 = x_94 = None + x_96 = torch.rms_norm( + x_95, + (1024,), + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_7_modules_norm1_parameters_weight_ = None + linear_35 = torch._C._nn.linear( + x_96, + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_96 = ( + l_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_14 = linear_35.reshape(1, 256, 3, 8, 128) + linear_35 = None + qkv_7 = reshape_14.permute(2, 0, 3, 1, 4) + reshape_14 = None + unbind_7 = qkv_7.unbind(0) + qkv_7 = None + q_7 = unbind_7[0] + k_7 = unbind_7[1] + v_7 = unbind_7[2] + unbind_7 = None + x_97 = torch._C._nn.scaled_dot_product_attention( + q_7, k_7, v_7, attn_mask=None, dropout_p=0.0 + ) + q_7 = k_7 = v_7 = None + transpose_8 = x_97.transpose(1, 2) + x_97 = None + x_98 = transpose_8.reshape(1, 256, 1024) + transpose_8 = None + x_99 = torch._C._nn.linear( + x_98, + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_98 = ( + l_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_ + ) = None + x_100 = torch.nn.functional.dropout(x_99, 0.0, False, False) + x_99 = None + x_101 = x_95 + x_100 + x_95 = x_100 = None + x_102 = torch.rms_norm( + x_101, + (1024,), + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_7_modules_norm2_parameters_weight_ = None + x_gate_7 = torch._C._nn.linear( + x_102, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_103 = torch._C._nn.linear( + x_102, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_102 = ( + l_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_7 = torch.nn.functional.silu(x_gate_7, inplace=False) + x_gate_7 = None + x_104 = silu_7 * x_103 + silu_7 = x_103 = None + x_105 = torch.nn.functional.dropout(x_104, 0.0, False, False) + x_104 = None + x_106 = torch._C._nn.linear( + x_105, + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_105 = ( + l_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_107 = torch.nn.functional.dropout(x_106, 0.0, False, False) + x_106 = None + x_108 = x_101 + x_107 + x_101 = x_107 = None + x_109 = torch.rms_norm( + x_108, + (1024,), + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_8_modules_norm1_parameters_weight_ = None + linear_40 = torch._C._nn.linear( + x_109, + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_109 = ( + l_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_16 = linear_40.reshape(1, 256, 3, 8, 128) + linear_40 = None + qkv_8 = reshape_16.permute(2, 0, 3, 1, 4) + reshape_16 = None + unbind_8 = qkv_8.unbind(0) + qkv_8 = None + q_8 = unbind_8[0] + k_8 = unbind_8[1] + v_8 = unbind_8[2] + unbind_8 = None + x_110 = torch._C._nn.scaled_dot_product_attention( + q_8, k_8, v_8, attn_mask=None, dropout_p=0.0 + ) + q_8 = k_8 = v_8 = None + transpose_9 = x_110.transpose(1, 2) + x_110 = None + x_111 = transpose_9.reshape(1, 256, 1024) + transpose_9 = None + x_112 = torch._C._nn.linear( + x_111, + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_111 = ( + l_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_ + ) = None + x_113 = torch.nn.functional.dropout(x_112, 0.0, False, False) + x_112 = None + x_114 = x_108 + x_113 + x_108 = x_113 = None + x_115 = torch.rms_norm( + x_114, + (1024,), + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_8_modules_norm2_parameters_weight_ = None + x_gate_8 = torch._C._nn.linear( + x_115, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_116 = torch._C._nn.linear( + x_115, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_115 = ( + l_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_8 = torch.nn.functional.silu(x_gate_8, inplace=False) + x_gate_8 = None + x_117 = silu_8 * x_116 + silu_8 = x_116 = None + x_118 = torch.nn.functional.dropout(x_117, 0.0, False, False) + x_117 = None + x_119 = torch._C._nn.linear( + x_118, + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_118 = ( + l_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_120 = torch.nn.functional.dropout(x_119, 0.0, False, False) + x_119 = None + x_121 = x_114 + x_120 + x_114 = x_120 = None + x_122 = torch.rms_norm( + x_121, + (1024,), + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_9_modules_norm1_parameters_weight_ = None + linear_45 = torch._C._nn.linear( + x_122, + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_122 = ( + l_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_18 = linear_45.reshape(1, 256, 3, 8, 128) + linear_45 = None + qkv_9 = reshape_18.permute(2, 0, 3, 1, 4) + reshape_18 = None + unbind_9 = qkv_9.unbind(0) + qkv_9 = None + q_9 = unbind_9[0] + k_9 = unbind_9[1] + v_9 = unbind_9[2] + unbind_9 = None + x_123 = torch._C._nn.scaled_dot_product_attention( + q_9, k_9, v_9, attn_mask=None, dropout_p=0.0 + ) + q_9 = k_9 = v_9 = None + transpose_10 = x_123.transpose(1, 2) + x_123 = None + x_124 = transpose_10.reshape(1, 256, 1024) + transpose_10 = None + x_125 = torch._C._nn.linear( + x_124, + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_124 = ( + l_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_ + ) = None + x_126 = torch.nn.functional.dropout(x_125, 0.0, False, False) + x_125 = None + x_127 = x_121 + x_126 + x_121 = x_126 = None + x_128 = torch.rms_norm( + x_127, + (1024,), + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_9_modules_norm2_parameters_weight_ = None + x_gate_9 = torch._C._nn.linear( + x_128, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_129 = torch._C._nn.linear( + x_128, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_128 = ( + l_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_ + ) = None + silu_9 = torch.nn.functional.silu(x_gate_9, inplace=False) + x_gate_9 = None + x_130 = silu_9 * x_129 + silu_9 = x_129 = None + x_131 = torch.nn.functional.dropout(x_130, 0.0, False, False) + x_130 = None + x_132 = torch._C._nn.linear( + x_131, + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_131 = ( + l_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_133 = torch.nn.functional.dropout(x_132, 0.0, False, False) + x_132 = None + x_134 = x_127 + x_133 + x_127 = x_133 = None + x_135 = torch.rms_norm( + x_134, + (1024,), + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_10_modules_norm1_parameters_weight_ = None + linear_50 = torch._C._nn.linear( + x_135, + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_135 = ( + l_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_20 = linear_50.reshape(1, 256, 3, 8, 128) + linear_50 = None + qkv_10 = reshape_20.permute(2, 0, 3, 1, 4) + reshape_20 = None + unbind_10 = qkv_10.unbind(0) + qkv_10 = None + q_10 = unbind_10[0] + k_10 = unbind_10[1] + v_10 = unbind_10[2] + unbind_10 = None + x_136 = torch._C._nn.scaled_dot_product_attention( + q_10, k_10, v_10, attn_mask=None, dropout_p=0.0 + ) + q_10 = k_10 = v_10 = None + transpose_11 = x_136.transpose(1, 2) + x_136 = None + x_137 = transpose_11.reshape(1, 256, 1024) + transpose_11 = None + x_138 = torch._C._nn.linear( + x_137, + l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_137 = l_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_ = (None) + x_139 = torch.nn.functional.dropout(x_138, 0.0, False, False) + x_138 = None + x_140 = x_134 + x_139 + x_134 = x_139 = None + x_141 = torch.rms_norm( + x_140, + (1024,), + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_10_modules_norm2_parameters_weight_ = None + x_gate_10 = torch._C._nn.linear( + x_141, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_142 = torch._C._nn.linear( + x_141, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_141 = l_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_10 = torch.nn.functional.silu(x_gate_10, inplace=False) + x_gate_10 = None + x_143 = silu_10 * x_142 + silu_10 = x_142 = None + x_144 = torch.nn.functional.dropout(x_143, 0.0, False, False) + x_143 = None + x_145 = torch._C._nn.linear( + x_144, + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_144 = ( + l_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_146 = torch.nn.functional.dropout(x_145, 0.0, False, False) + x_145 = None + x_147 = x_140 + x_146 + x_140 = x_146 = None + x_148 = torch.rms_norm( + x_147, + (1024,), + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_11_modules_norm1_parameters_weight_ = None + linear_55 = torch._C._nn.linear( + x_148, + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_148 = ( + l_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_22 = linear_55.reshape(1, 256, 3, 8, 128) + linear_55 = None + qkv_11 = reshape_22.permute(2, 0, 3, 1, 4) + reshape_22 = None + unbind_11 = qkv_11.unbind(0) + qkv_11 = None + q_11 = unbind_11[0] + k_11 = unbind_11[1] + v_11 = unbind_11[2] + unbind_11 = None + x_149 = torch._C._nn.scaled_dot_product_attention( + q_11, k_11, v_11, attn_mask=None, dropout_p=0.0 + ) + q_11 = k_11 = v_11 = None + transpose_12 = x_149.transpose(1, 2) + x_149 = None + x_150 = transpose_12.reshape(1, 256, 1024) + transpose_12 = None + x_151 = torch._C._nn.linear( + x_150, + l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_150 = l_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_ = (None) + x_152 = torch.nn.functional.dropout(x_151, 0.0, False, False) + x_151 = None + x_153 = x_147 + x_152 + x_147 = x_152 = None + x_154 = torch.rms_norm( + x_153, + (1024,), + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_11_modules_norm2_parameters_weight_ = None + x_gate_11 = torch._C._nn.linear( + x_154, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_155 = torch._C._nn.linear( + x_154, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_154 = l_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_11 = torch.nn.functional.silu(x_gate_11, inplace=False) + x_gate_11 = None + x_156 = silu_11 * x_155 + silu_11 = x_155 = None + x_157 = torch.nn.functional.dropout(x_156, 0.0, False, False) + x_156 = None + x_158 = torch._C._nn.linear( + x_157, + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_157 = ( + l_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_159 = torch.nn.functional.dropout(x_158, 0.0, False, False) + x_158 = None + x_160 = x_153 + x_159 + x_153 = x_159 = None + x_161 = torch.rms_norm( + x_160, + (1024,), + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_12_modules_norm1_parameters_weight_ = None + linear_60 = torch._C._nn.linear( + x_161, + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_161 = ( + l_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_24 = linear_60.reshape(1, 256, 3, 8, 128) + linear_60 = None + qkv_12 = reshape_24.permute(2, 0, 3, 1, 4) + reshape_24 = None + unbind_12 = qkv_12.unbind(0) + qkv_12 = None + q_12 = unbind_12[0] + k_12 = unbind_12[1] + v_12 = unbind_12[2] + unbind_12 = None + x_162 = torch._C._nn.scaled_dot_product_attention( + q_12, k_12, v_12, attn_mask=None, dropout_p=0.0 + ) + q_12 = k_12 = v_12 = None + transpose_13 = x_162.transpose(1, 2) + x_162 = None + x_163 = transpose_13.reshape(1, 256, 1024) + transpose_13 = None + x_164 = torch._C._nn.linear( + x_163, + l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_163 = l_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_ = (None) + x_165 = torch.nn.functional.dropout(x_164, 0.0, False, False) + x_164 = None + x_166 = x_160 + x_165 + x_160 = x_165 = None + x_167 = torch.rms_norm( + x_166, + (1024,), + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_12_modules_norm2_parameters_weight_ = None + x_gate_12 = torch._C._nn.linear( + x_167, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_168 = torch._C._nn.linear( + x_167, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_167 = l_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_12 = torch.nn.functional.silu(x_gate_12, inplace=False) + x_gate_12 = None + x_169 = silu_12 * x_168 + silu_12 = x_168 = None + x_170 = torch.nn.functional.dropout(x_169, 0.0, False, False) + x_169 = None + x_171 = torch._C._nn.linear( + x_170, + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_170 = ( + l_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_172 = torch.nn.functional.dropout(x_171, 0.0, False, False) + x_171 = None + x_173 = x_166 + x_172 + x_166 = x_172 = None + x_174 = torch.rms_norm( + x_173, + (1024,), + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_13_modules_norm1_parameters_weight_ = None + linear_65 = torch._C._nn.linear( + x_174, + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_174 = ( + l_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_26 = linear_65.reshape(1, 256, 3, 8, 128) + linear_65 = None + qkv_13 = reshape_26.permute(2, 0, 3, 1, 4) + reshape_26 = None + unbind_13 = qkv_13.unbind(0) + qkv_13 = None + q_13 = unbind_13[0] + k_13 = unbind_13[1] + v_13 = unbind_13[2] + unbind_13 = None + x_175 = torch._C._nn.scaled_dot_product_attention( + q_13, k_13, v_13, attn_mask=None, dropout_p=0.0 + ) + q_13 = k_13 = v_13 = None + transpose_14 = x_175.transpose(1, 2) + x_175 = None + x_176 = transpose_14.reshape(1, 256, 1024) + transpose_14 = None + x_177 = torch._C._nn.linear( + x_176, + l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_176 = l_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_ = (None) + x_178 = torch.nn.functional.dropout(x_177, 0.0, False, False) + x_177 = None + x_179 = x_173 + x_178 + x_173 = x_178 = None + x_180 = torch.rms_norm( + x_179, + (1024,), + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_13_modules_norm2_parameters_weight_ = None + x_gate_13 = torch._C._nn.linear( + x_180, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_181 = torch._C._nn.linear( + x_180, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_180 = l_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_13 = torch.nn.functional.silu(x_gate_13, inplace=False) + x_gate_13 = None + x_182 = silu_13 * x_181 + silu_13 = x_181 = None + x_183 = torch.nn.functional.dropout(x_182, 0.0, False, False) + x_182 = None + x_184 = torch._C._nn.linear( + x_183, + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_183 = ( + l_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_185 = torch.nn.functional.dropout(x_184, 0.0, False, False) + x_184 = None + x_186 = x_179 + x_185 + x_179 = x_185 = None + x_187 = torch.rms_norm( + x_186, + (1024,), + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_14_modules_norm1_parameters_weight_ = None + linear_70 = torch._C._nn.linear( + x_187, + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_187 = ( + l_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_28 = linear_70.reshape(1, 256, 3, 8, 128) + linear_70 = None + qkv_14 = reshape_28.permute(2, 0, 3, 1, 4) + reshape_28 = None + unbind_14 = qkv_14.unbind(0) + qkv_14 = None + q_14 = unbind_14[0] + k_14 = unbind_14[1] + v_14 = unbind_14[2] + unbind_14 = None + x_188 = torch._C._nn.scaled_dot_product_attention( + q_14, k_14, v_14, attn_mask=None, dropout_p=0.0 + ) + q_14 = k_14 = v_14 = None + transpose_15 = x_188.transpose(1, 2) + x_188 = None + x_189 = transpose_15.reshape(1, 256, 1024) + transpose_15 = None + x_190 = torch._C._nn.linear( + x_189, + l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_189 = l_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_ = (None) + x_191 = torch.nn.functional.dropout(x_190, 0.0, False, False) + x_190 = None + x_192 = x_186 + x_191 + x_186 = x_191 = None + x_193 = torch.rms_norm( + x_192, + (1024,), + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_14_modules_norm2_parameters_weight_ = None + x_gate_14 = torch._C._nn.linear( + x_193, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_194 = torch._C._nn.linear( + x_193, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_193 = l_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_14 = torch.nn.functional.silu(x_gate_14, inplace=False) + x_gate_14 = None + x_195 = silu_14 * x_194 + silu_14 = x_194 = None + x_196 = torch.nn.functional.dropout(x_195, 0.0, False, False) + x_195 = None + x_197 = torch._C._nn.linear( + x_196, + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_196 = ( + l_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_198 = torch.nn.functional.dropout(x_197, 0.0, False, False) + x_197 = None + x_199 = x_192 + x_198 + x_192 = x_198 = None + x_200 = torch.rms_norm( + x_199, + (1024,), + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_15_modules_norm1_parameters_weight_ = None + linear_75 = torch._C._nn.linear( + x_200, + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_200 = ( + l_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_30 = linear_75.reshape(1, 256, 3, 8, 128) + linear_75 = None + qkv_15 = reshape_30.permute(2, 0, 3, 1, 4) + reshape_30 = None + unbind_15 = qkv_15.unbind(0) + qkv_15 = None + q_15 = unbind_15[0] + k_15 = unbind_15[1] + v_15 = unbind_15[2] + unbind_15 = None + x_201 = torch._C._nn.scaled_dot_product_attention( + q_15, k_15, v_15, attn_mask=None, dropout_p=0.0 + ) + q_15 = k_15 = v_15 = None + transpose_16 = x_201.transpose(1, 2) + x_201 = None + x_202 = transpose_16.reshape(1, 256, 1024) + transpose_16 = None + x_203 = torch._C._nn.linear( + x_202, + l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_202 = l_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_ = (None) + x_204 = torch.nn.functional.dropout(x_203, 0.0, False, False) + x_203 = None + x_205 = x_199 + x_204 + x_199 = x_204 = None + x_206 = torch.rms_norm( + x_205, + (1024,), + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_15_modules_norm2_parameters_weight_ = None + x_gate_15 = torch._C._nn.linear( + x_206, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_207 = torch._C._nn.linear( + x_206, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_206 = l_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_15 = torch.nn.functional.silu(x_gate_15, inplace=False) + x_gate_15 = None + x_208 = silu_15 * x_207 + silu_15 = x_207 = None + x_209 = torch.nn.functional.dropout(x_208, 0.0, False, False) + x_208 = None + x_210 = torch._C._nn.linear( + x_209, + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_209 = ( + l_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_211 = torch.nn.functional.dropout(x_210, 0.0, False, False) + x_210 = None + x_212 = x_205 + x_211 + x_205 = x_211 = None + x_213 = torch.rms_norm( + x_212, + (1024,), + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_16_modules_norm1_parameters_weight_ = None + linear_80 = torch._C._nn.linear( + x_213, + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_213 = ( + l_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_32 = linear_80.reshape(1, 256, 3, 8, 128) + linear_80 = None + qkv_16 = reshape_32.permute(2, 0, 3, 1, 4) + reshape_32 = None + unbind_16 = qkv_16.unbind(0) + qkv_16 = None + q_16 = unbind_16[0] + k_16 = unbind_16[1] + v_16 = unbind_16[2] + unbind_16 = None + x_214 = torch._C._nn.scaled_dot_product_attention( + q_16, k_16, v_16, attn_mask=None, dropout_p=0.0 + ) + q_16 = k_16 = v_16 = None + transpose_17 = x_214.transpose(1, 2) + x_214 = None + x_215 = transpose_17.reshape(1, 256, 1024) + transpose_17 = None + x_216 = torch._C._nn.linear( + x_215, + l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_215 = l_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_ = (None) + x_217 = torch.nn.functional.dropout(x_216, 0.0, False, False) + x_216 = None + x_218 = x_212 + x_217 + x_212 = x_217 = None + x_219 = torch.rms_norm( + x_218, + (1024,), + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_16_modules_norm2_parameters_weight_ = None + x_gate_16 = torch._C._nn.linear( + x_219, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_220 = torch._C._nn.linear( + x_219, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_219 = l_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_16 = torch.nn.functional.silu(x_gate_16, inplace=False) + x_gate_16 = None + x_221 = silu_16 * x_220 + silu_16 = x_220 = None + x_222 = torch.nn.functional.dropout(x_221, 0.0, False, False) + x_221 = None + x_223 = torch._C._nn.linear( + x_222, + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_222 = ( + l_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_224 = torch.nn.functional.dropout(x_223, 0.0, False, False) + x_223 = None + x_225 = x_218 + x_224 + x_218 = x_224 = None + x_226 = torch.rms_norm( + x_225, + (1024,), + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_17_modules_norm1_parameters_weight_ = None + linear_85 = torch._C._nn.linear( + x_226, + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_226 = ( + l_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_34 = linear_85.reshape(1, 256, 3, 8, 128) + linear_85 = None + qkv_17 = reshape_34.permute(2, 0, 3, 1, 4) + reshape_34 = None + unbind_17 = qkv_17.unbind(0) + qkv_17 = None + q_17 = unbind_17[0] + k_17 = unbind_17[1] + v_17 = unbind_17[2] + unbind_17 = None + x_227 = torch._C._nn.scaled_dot_product_attention( + q_17, k_17, v_17, attn_mask=None, dropout_p=0.0 + ) + q_17 = k_17 = v_17 = None + transpose_18 = x_227.transpose(1, 2) + x_227 = None + x_228 = transpose_18.reshape(1, 256, 1024) + transpose_18 = None + x_229 = torch._C._nn.linear( + x_228, + l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_228 = l_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_ = (None) + x_230 = torch.nn.functional.dropout(x_229, 0.0, False, False) + x_229 = None + x_231 = x_225 + x_230 + x_225 = x_230 = None + x_232 = torch.rms_norm( + x_231, + (1024,), + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_17_modules_norm2_parameters_weight_ = None + x_gate_17 = torch._C._nn.linear( + x_232, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_233 = torch._C._nn.linear( + x_232, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_232 = l_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_17 = torch.nn.functional.silu(x_gate_17, inplace=False) + x_gate_17 = None + x_234 = silu_17 * x_233 + silu_17 = x_233 = None + x_235 = torch.nn.functional.dropout(x_234, 0.0, False, False) + x_234 = None + x_236 = torch._C._nn.linear( + x_235, + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_235 = ( + l_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_237 = torch.nn.functional.dropout(x_236, 0.0, False, False) + x_236 = None + x_238 = x_231 + x_237 + x_231 = x_237 = None + x_239 = torch.rms_norm( + x_238, + (1024,), + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_18_modules_norm1_parameters_weight_ = None + linear_90 = torch._C._nn.linear( + x_239, + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_239 = ( + l_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_36 = linear_90.reshape(1, 256, 3, 8, 128) + linear_90 = None + qkv_18 = reshape_36.permute(2, 0, 3, 1, 4) + reshape_36 = None + unbind_18 = qkv_18.unbind(0) + qkv_18 = None + q_18 = unbind_18[0] + k_18 = unbind_18[1] + v_18 = unbind_18[2] + unbind_18 = None + x_240 = torch._C._nn.scaled_dot_product_attention( + q_18, k_18, v_18, attn_mask=None, dropout_p=0.0 + ) + q_18 = k_18 = v_18 = None + transpose_19 = x_240.transpose(1, 2) + x_240 = None + x_241 = transpose_19.reshape(1, 256, 1024) + transpose_19 = None + x_242 = torch._C._nn.linear( + x_241, + l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_241 = l_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_ = (None) + x_243 = torch.nn.functional.dropout(x_242, 0.0, False, False) + x_242 = None + x_244 = x_238 + x_243 + x_238 = x_243 = None + x_245 = torch.rms_norm( + x_244, + (1024,), + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_18_modules_norm2_parameters_weight_ = None + x_gate_18 = torch._C._nn.linear( + x_245, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_246 = torch._C._nn.linear( + x_245, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_245 = l_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_18 = torch.nn.functional.silu(x_gate_18, inplace=False) + x_gate_18 = None + x_247 = silu_18 * x_246 + silu_18 = x_246 = None + x_248 = torch.nn.functional.dropout(x_247, 0.0, False, False) + x_247 = None + x_249 = torch._C._nn.linear( + x_248, + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_248 = ( + l_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_250 = torch.nn.functional.dropout(x_249, 0.0, False, False) + x_249 = None + x_251 = x_244 + x_250 + x_244 = x_250 = None + x_252 = torch.rms_norm( + x_251, + (1024,), + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_19_modules_norm1_parameters_weight_ = None + linear_95 = torch._C._nn.linear( + x_252, + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_252 = ( + l_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_38 = linear_95.reshape(1, 256, 3, 8, 128) + linear_95 = None + qkv_19 = reshape_38.permute(2, 0, 3, 1, 4) + reshape_38 = None + unbind_19 = qkv_19.unbind(0) + qkv_19 = None + q_19 = unbind_19[0] + k_19 = unbind_19[1] + v_19 = unbind_19[2] + unbind_19 = None + x_253 = torch._C._nn.scaled_dot_product_attention( + q_19, k_19, v_19, attn_mask=None, dropout_p=0.0 + ) + q_19 = k_19 = v_19 = None + transpose_20 = x_253.transpose(1, 2) + x_253 = None + x_254 = transpose_20.reshape(1, 256, 1024) + transpose_20 = None + x_255 = torch._C._nn.linear( + x_254, + l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_254 = l_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_ = (None) + x_256 = torch.nn.functional.dropout(x_255, 0.0, False, False) + x_255 = None + x_257 = x_251 + x_256 + x_251 = x_256 = None + x_258 = torch.rms_norm( + x_257, + (1024,), + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_19_modules_norm2_parameters_weight_ = None + x_gate_19 = torch._C._nn.linear( + x_258, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_259 = torch._C._nn.linear( + x_258, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_258 = l_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_19 = torch.nn.functional.silu(x_gate_19, inplace=False) + x_gate_19 = None + x_260 = silu_19 * x_259 + silu_19 = x_259 = None + x_261 = torch.nn.functional.dropout(x_260, 0.0, False, False) + x_260 = None + x_262 = torch._C._nn.linear( + x_261, + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_261 = ( + l_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_263 = torch.nn.functional.dropout(x_262, 0.0, False, False) + x_262 = None + x_264 = x_257 + x_263 + x_257 = x_263 = None + x_265 = torch.rms_norm( + x_264, + (1024,), + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_20_modules_norm1_parameters_weight_ = None + linear_100 = torch._C._nn.linear( + x_265, + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_265 = ( + l_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_40 = linear_100.reshape(1, 256, 3, 8, 128) + linear_100 = None + qkv_20 = reshape_40.permute(2, 0, 3, 1, 4) + reshape_40 = None + unbind_20 = qkv_20.unbind(0) + qkv_20 = None + q_20 = unbind_20[0] + k_20 = unbind_20[1] + v_20 = unbind_20[2] + unbind_20 = None + x_266 = torch._C._nn.scaled_dot_product_attention( + q_20, k_20, v_20, attn_mask=None, dropout_p=0.0 + ) + q_20 = k_20 = v_20 = None + transpose_21 = x_266.transpose(1, 2) + x_266 = None + x_267 = transpose_21.reshape(1, 256, 1024) + transpose_21 = None + x_268 = torch._C._nn.linear( + x_267, + l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_267 = l_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_ = (None) + x_269 = torch.nn.functional.dropout(x_268, 0.0, False, False) + x_268 = None + x_270 = x_264 + x_269 + x_264 = x_269 = None + x_271 = torch.rms_norm( + x_270, + (1024,), + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_20_modules_norm2_parameters_weight_ = None + x_gate_20 = torch._C._nn.linear( + x_271, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_272 = torch._C._nn.linear( + x_271, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_271 = l_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_20 = torch.nn.functional.silu(x_gate_20, inplace=False) + x_gate_20 = None + x_273 = silu_20 * x_272 + silu_20 = x_272 = None + x_274 = torch.nn.functional.dropout(x_273, 0.0, False, False) + x_273 = None + x_275 = torch._C._nn.linear( + x_274, + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_274 = ( + l_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_276 = torch.nn.functional.dropout(x_275, 0.0, False, False) + x_275 = None + x_277 = x_270 + x_276 + x_270 = x_276 = None + x_278 = torch.rms_norm( + x_277, + (1024,), + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_21_modules_norm1_parameters_weight_ = None + linear_105 = torch._C._nn.linear( + x_278, + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_278 = ( + l_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_42 = linear_105.reshape(1, 256, 3, 8, 128) + linear_105 = None + qkv_21 = reshape_42.permute(2, 0, 3, 1, 4) + reshape_42 = None + unbind_21 = qkv_21.unbind(0) + qkv_21 = None + q_21 = unbind_21[0] + k_21 = unbind_21[1] + v_21 = unbind_21[2] + unbind_21 = None + x_279 = torch._C._nn.scaled_dot_product_attention( + q_21, k_21, v_21, attn_mask=None, dropout_p=0.0 + ) + q_21 = k_21 = v_21 = None + transpose_22 = x_279.transpose(1, 2) + x_279 = None + x_280 = transpose_22.reshape(1, 256, 1024) + transpose_22 = None + x_281 = torch._C._nn.linear( + x_280, + l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_280 = l_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_ = (None) + x_282 = torch.nn.functional.dropout(x_281, 0.0, False, False) + x_281 = None + x_283 = x_277 + x_282 + x_277 = x_282 = None + x_284 = torch.rms_norm( + x_283, + (1024,), + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_21_modules_norm2_parameters_weight_ = None + x_gate_21 = torch._C._nn.linear( + x_284, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_285 = torch._C._nn.linear( + x_284, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_284 = l_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_21 = torch.nn.functional.silu(x_gate_21, inplace=False) + x_gate_21 = None + x_286 = silu_21 * x_285 + silu_21 = x_285 = None + x_287 = torch.nn.functional.dropout(x_286, 0.0, False, False) + x_286 = None + x_288 = torch._C._nn.linear( + x_287, + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_287 = ( + l_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_289 = torch.nn.functional.dropout(x_288, 0.0, False, False) + x_288 = None + x_290 = x_283 + x_289 + x_283 = x_289 = None + x_291 = torch.rms_norm( + x_290, + (1024,), + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_22_modules_norm1_parameters_weight_ = None + linear_110 = torch._C._nn.linear( + x_291, + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_291 = ( + l_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_44 = linear_110.reshape(1, 256, 3, 8, 128) + linear_110 = None + qkv_22 = reshape_44.permute(2, 0, 3, 1, 4) + reshape_44 = None + unbind_22 = qkv_22.unbind(0) + qkv_22 = None + q_22 = unbind_22[0] + k_22 = unbind_22[1] + v_22 = unbind_22[2] + unbind_22 = None + x_292 = torch._C._nn.scaled_dot_product_attention( + q_22, k_22, v_22, attn_mask=None, dropout_p=0.0 + ) + q_22 = k_22 = v_22 = None + transpose_23 = x_292.transpose(1, 2) + x_292 = None + x_293 = transpose_23.reshape(1, 256, 1024) + transpose_23 = None + x_294 = torch._C._nn.linear( + x_293, + l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_293 = l_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_ = (None) + x_295 = torch.nn.functional.dropout(x_294, 0.0, False, False) + x_294 = None + x_296 = x_290 + x_295 + x_290 = x_295 = None + x_297 = torch.rms_norm( + x_296, + (1024,), + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_22_modules_norm2_parameters_weight_ = None + x_gate_22 = torch._C._nn.linear( + x_297, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_298 = torch._C._nn.linear( + x_297, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_297 = l_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_22 = torch.nn.functional.silu(x_gate_22, inplace=False) + x_gate_22 = None + x_299 = silu_22 * x_298 + silu_22 = x_298 = None + x_300 = torch.nn.functional.dropout(x_299, 0.0, False, False) + x_299 = None + x_301 = torch._C._nn.linear( + x_300, + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_300 = ( + l_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_302 = torch.nn.functional.dropout(x_301, 0.0, False, False) + x_301 = None + x_303 = x_296 + x_302 + x_296 = x_302 = None + x_304 = torch.rms_norm( + x_303, + (1024,), + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_23_modules_norm1_parameters_weight_ = None + linear_115 = torch._C._nn.linear( + x_304, + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_, + None, + ) + x_304 = ( + l_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_ + ) = None + reshape_46 = linear_115.reshape(1, 256, 3, 8, 128) + linear_115 = None + qkv_23 = reshape_46.permute(2, 0, 3, 1, 4) + reshape_46 = None + unbind_23 = qkv_23.unbind(0) + qkv_23 = None + q_23 = unbind_23[0] + k_23 = unbind_23[1] + v_23 = unbind_23[2] + unbind_23 = None + x_305 = torch._C._nn.scaled_dot_product_attention( + q_23, k_23, v_23, attn_mask=None, dropout_p=0.0 + ) + q_23 = k_23 = v_23 = None + transpose_24 = x_305.transpose(1, 2) + x_305 = None + x_306 = transpose_24.reshape(1, 256, 1024) + transpose_24 = None + x_307 = torch._C._nn.linear( + x_306, + l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_, + None, + ) + x_306 = l_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_ = (None) + x_308 = torch.nn.functional.dropout(x_307, 0.0, False, False) + x_307 = None + x_309 = x_303 + x_308 + x_303 = x_308 = None + x_310 = torch.rms_norm( + x_309, + (1024,), + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_, + 1e-05, + ) + l_self_modules_blocks_modules_23_modules_norm2_parameters_weight_ = None + x_gate_23 = torch._C._nn.linear( + x_310, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_, + None, + ) + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_ = ( + None + ) + x_311 = torch._C._nn.linear( + x_310, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_, + None, + ) + x_310 = l_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_ = (None) + silu_23 = torch.nn.functional.silu(x_gate_23, inplace=False) + x_gate_23 = None + x_312 = silu_23 * x_311 + silu_23 = x_311 = None + x_313 = torch.nn.functional.dropout(x_312, 0.0, False, False) + x_312 = None + x_314 = torch._C._nn.linear( + x_313, + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_, + None, + ) + x_313 = ( + l_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_ + ) = None + x_315 = torch.nn.functional.dropout(x_314, 0.0, False, False) + x_314 = None + x_316 = x_309 + x_315 + x_309 = x_315 = None + x_317 = torch.rms_norm( + x_316, (1024,), l_self_modules_norm_parameters_weight_, 1e-05 + ) + x_316 = l_self_modules_norm_parameters_weight_ = None + x_318 = x_317[ + (slice(None, None, None), slice(l_self_num_prefix_tokens, None, None)) + ] + x_317 = l_self_num_prefix_tokens = None + x_319 = x_318.mean(dim=1) + x_318 = None + x_320 = torch.nn.functional.dropout(x_319, 0.0, False, False) + x_319 = None + return (x_320,) diff --git a/samples/timm/aimv2_large_patch14_224.apple_pt/weight_meta.py b/samples/timm/aimv2_large_patch14_224.apple_pt/weight_meta.py new file mode 100644 index 000000000..0e5483bc5 --- /dev/null +++ b/samples/timm/aimv2_large_patch14_224.apple_pt/weight_meta.py @@ -0,0 +1,1920 @@ +class Program_weight_tensor_meta_L_x_: + name = "L_x_" + shape = [1, 3, 224, 224] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.222 + std = 1.285 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_proj_parameters_weight_: + name = "L_self_modules_patch_embed_modules_proj_parameters_weight_" + shape = [1024, 3, 14, 14] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.024 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_proj_parameters_bias_: + name = "L_self_modules_patch_embed_modules_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.024 + data = None + + +class Program_weight_tensor_meta_L_self_modules_patch_embed_modules_norm_parameters_weight_: + name = "L_self_modules_patch_embed_modules_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_parameters_pos_embed_: + name = "L_self_parameters_pos_embed_" + shape = [1, 256, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_0_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_0_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_1_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_1_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_2_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_2_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_3_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_3_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_4_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_4_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_5_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_5_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_6_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_6_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_7_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_7_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_8_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_8_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_attn_modules_qkv_parameters_weight_" + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_9_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_9_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_10_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_10_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_11_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_11_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_12_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_12_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_13_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_13_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_14_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_14_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_15_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_15_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_16_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_16_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_17_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_17_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_18_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_18_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_19_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_19_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_20_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_20_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_21_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_21_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_22_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_22_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_norm1_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_attn_modules_qkv_parameters_weight_" + ) + shape = [3072, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_attn_modules_proj_parameters_weight_" + ) + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_norm2_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_g_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_: + name = ( + "L_self_modules_blocks_modules_23_modules_mlp_modules_fc1_x_parameters_weight_" + ) + shape = [2816, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_: + name = "L_self_modules_blocks_modules_23_modules_mlp_modules_fc2_parameters_weight_" + shape = [1024, 2816] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_norm_parameters_weight_: + name = "L_self_modules_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.000 + std = 0.000 + data = None + + +class Program_weight_tensor_meta_L_self_num_prefix_tokens: + name = "L_self_num_prefix_tokens" + shape = [] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + data = [4]