diff --git a/samples/torchaudio/hubert_large/graph_hash.txt b/samples/torchaudio/hubert_large/graph_hash.txt new file mode 100644 index 000000000..92309c892 --- /dev/null +++ b/samples/torchaudio/hubert_large/graph_hash.txt @@ -0,0 +1 @@ +81d946a34e1f67d7dd046f23bbc6f39ce94605f382b28a11bba6849b33300b93 \ No newline at end of file diff --git a/samples/torchaudio/hubert_large/graph_net.json b/samples/torchaudio/hubert_large/graph_net.json new file mode 100644 index 000000000..1373fe3b5 --- /dev/null +++ b/samples/torchaudio/hubert_large/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/samples/torchaudio/hubert_large/input_meta.py b/samples/torchaudio/hubert_large/input_meta.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/torchaudio/hubert_large/input_tensor_constraints.py b/samples/torchaudio/hubert_large/input_tensor_constraints.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/torchaudio/hubert_large/model.py b/samples/torchaudio/hubert_large/model.py new file mode 100644 index 000000000..db194f96e --- /dev/null +++ b/samples/torchaudio/hubert_large/model.py @@ -0,0 +1,3081 @@ +import torch + + +class GraphModule(torch.nn.Module): + def forward( + self, + L_waveforms_: torch.Tensor, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_conv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_conv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_conv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_conv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_conv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_conv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_conv_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original0_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original1_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_bias_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_weight_: torch.nn.parameter.Parameter, + L_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_bias_: torch.nn.parameter.Parameter, + ): + l_waveforms_ = L_waveforms_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_conv_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_conv_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_conv_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_conv_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_conv_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_conv_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_conv_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_conv_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_conv_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_conv_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_conv_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_conv_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_conv_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_conv_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_weight_ = L_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_weight_ + l_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_bias_ = L_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original0_ = L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original0_ + l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original1_ = L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original1_ + l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_bias_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_weight_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_weight_ + l_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_bias_ = L_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_bias_ + waveforms = torch.nn.functional.layer_norm(l_waveforms_, (1, 80000)) + l_waveforms_ = None + x = waveforms.unsqueeze(1) + waveforms = None + x_1 = torch.conv1d( + x, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_conv_parameters_weight_, + None, + (5,), + (0,), + (1,), + 1, + ) + x = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_conv_parameters_weight_ = (None) + x_2 = x_1.transpose(-2, -1) + x_1 = None + x_3 = torch.nn.functional.layer_norm( + x_2, + (512,), + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_2 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_bias_ = (None) + x_4 = x_3.transpose(-2, -1) + x_3 = None + x_5 = torch._C._nn.gelu(x_4) + x_4 = None + x_6 = torch.conv1d( + x_5, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_conv_parameters_weight_, + None, + (2,), + (0,), + (1,), + 1, + ) + x_5 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_conv_parameters_weight_ = (None) + x_7 = x_6.transpose(-2, -1) + x_6 = None + x_8 = torch.nn.functional.layer_norm( + x_7, + (512,), + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_7 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_bias_ = (None) + x_9 = x_8.transpose(-2, -1) + x_8 = None + x_10 = torch._C._nn.gelu(x_9) + x_9 = None + x_11 = torch.conv1d( + x_10, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_conv_parameters_weight_, + None, + (2,), + (0,), + (1,), + 1, + ) + x_10 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_conv_parameters_weight_ = (None) + x_12 = x_11.transpose(-2, -1) + x_11 = None + x_13 = torch.nn.functional.layer_norm( + x_12, + (512,), + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_12 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_bias_ = (None) + x_14 = x_13.transpose(-2, -1) + x_13 = None + x_15 = torch._C._nn.gelu(x_14) + x_14 = None + x_16 = torch.conv1d( + x_15, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_conv_parameters_weight_, + None, + (2,), + (0,), + (1,), + 1, + ) + x_15 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_conv_parameters_weight_ = (None) + x_17 = x_16.transpose(-2, -1) + x_16 = None + x_18 = torch.nn.functional.layer_norm( + x_17, + (512,), + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_17 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_bias_ = (None) + x_19 = x_18.transpose(-2, -1) + x_18 = None + x_20 = torch._C._nn.gelu(x_19) + x_19 = None + x_21 = torch.conv1d( + x_20, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_conv_parameters_weight_, + None, + (2,), + (0,), + (1,), + 1, + ) + x_20 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_conv_parameters_weight_ = (None) + x_22 = x_21.transpose(-2, -1) + x_21 = None + x_23 = torch.nn.functional.layer_norm( + x_22, + (512,), + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_22 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_bias_ = (None) + x_24 = x_23.transpose(-2, -1) + x_23 = None + x_25 = torch._C._nn.gelu(x_24) + x_24 = None + x_26 = torch.conv1d( + x_25, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_conv_parameters_weight_, + None, + (2,), + (0,), + (1,), + 1, + ) + x_25 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_conv_parameters_weight_ = (None) + x_27 = x_26.transpose(-2, -1) + x_26 = None + x_28 = torch.nn.functional.layer_norm( + x_27, + (512,), + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_27 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_bias_ = (None) + x_29 = x_28.transpose(-2, -1) + x_28 = None + x_30 = torch._C._nn.gelu(x_29) + x_29 = None + x_31 = torch.conv1d( + x_30, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_conv_parameters_weight_, + None, + (2,), + (0,), + (1,), + 1, + ) + x_30 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_conv_parameters_weight_ = (None) + x_32 = x_31.transpose(-2, -1) + x_31 = None + x_33 = torch.nn.functional.layer_norm( + x_32, + (512,), + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_32 = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_bias_ = (None) + x_34 = x_33.transpose(-2, -1) + x_33 = None + x_35 = torch._C._nn.gelu(x_34) + x_34 = None + x_36 = x_35.transpose(1, 2) + x_35 = None + x_37 = torch.nn.functional.layer_norm( + x_36, + (512,), + l_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_36 = l_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_bias_ = (None) + x_38 = torch._C._nn.linear( + x_37, + l_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_weight_, + l_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_bias_, + ) + x_37 = l_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_weight_ = l_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_bias_ = (None) + x_39 = torch.nn.functional.dropout(x_38, 0.0, False, False) + x_38 = None + x_40 = x_39.transpose(-2, -1) + x_41 = torch._weight_norm( + l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original1_, + l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original0_, + 2, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original1_ = l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original0_ = (None) + x_42 = torch.conv1d( + x_40, + x_41, + l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_parameters_bias_, + (1,), + (64,), + (1,), + 16, + ) + x_40 = ( + x_41 + ) = l_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_parameters_bias_ = (None) + x_43 = x_42[(Ellipsis, slice(None, -1, None))] + x_42 = None + x_44 = torch._C._nn.gelu(x_43) + x_43 = None + x_45 = x_44.transpose(-2, -1) + x_44 = None + x_46 = x_39 + x_45 + x_39 = x_45 = None + x_47 = torch.nn.functional.dropout(x_46, 0.0, False, False) + x_46 = None + x_48 = torch.nn.functional.layer_norm( + x_47, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_bias_ = (None) + linear_1 = torch._C._nn.linear( + x_48, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_bias_ = (None) + view = linear_1.view(1, 249, 16, 64) + linear_1 = None + q = view.transpose(2, 1) + view = None + linear_2 = torch._C._nn.linear( + x_48, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_1 = linear_2.view(1, 249, 16, 64) + linear_2 = None + k = view_1.transpose(2, 1) + view_1 = None + linear_3 = torch._C._nn.linear( + x_48, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_bias_, + ) + x_48 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_2 = linear_3.view(1, 249, 16, 64) + linear_3 = None + v = view_2.transpose(2, 1) + view_2 = None + attn_output = torch._C._nn.scaled_dot_product_attention( + q, k, v, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q = k = v = None + transpose_20 = attn_output.transpose(1, 2) + attn_output = None + attn_output_1 = transpose_20.reshape(1, -1, 1024) + transpose_20 = None + output = torch._C._nn.linear( + attn_output_1, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_1 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_49 = torch.nn.functional.dropout(output, 0.0, False, False) + output = None + x_50 = x_47 + x_49 + x_47 = x_49 = None + layer_norm_10 = torch.nn.functional.layer_norm( + x_50, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_bias_ = (None) + x_51 = torch._C._nn.linear( + layer_norm_10, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_10 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_52 = torch._C._nn.gelu(x_51) + x_51 = None + x_53 = torch.nn.functional.dropout(x_52, 0.0, False, False) + x_52 = None + x_54 = torch._C._nn.linear( + x_53, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_53 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_55 = torch.nn.functional.dropout(x_54, 0.0, False, False) + x_54 = None + x_56 = x_50 + x_55 + x_50 = x_55 = None + x_57 = torch.nn.functional.layer_norm( + x_56, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_bias_ = (None) + linear_7 = torch._C._nn.linear( + x_57, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_3 = linear_7.view(1, 249, 16, 64) + linear_7 = None + q_1 = view_3.transpose(2, 1) + view_3 = None + linear_8 = torch._C._nn.linear( + x_57, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_4 = linear_8.view(1, 249, 16, 64) + linear_8 = None + k_1 = view_4.transpose(2, 1) + view_4 = None + linear_9 = torch._C._nn.linear( + x_57, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_bias_, + ) + x_57 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_5 = linear_9.view(1, 249, 16, 64) + linear_9 = None + v_1 = view_5.transpose(2, 1) + view_5 = None + attn_output_2 = torch._C._nn.scaled_dot_product_attention( + q_1, k_1, v_1, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_1 = k_1 = v_1 = None + transpose_24 = attn_output_2.transpose(1, 2) + attn_output_2 = None + attn_output_3 = transpose_24.reshape(1, -1, 1024) + transpose_24 = None + output_1 = torch._C._nn.linear( + attn_output_3, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_3 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_58 = torch.nn.functional.dropout(output_1, 0.0, False, False) + output_1 = None + x_59 = x_56 + x_58 + x_56 = x_58 = None + layer_norm_12 = torch.nn.functional.layer_norm( + x_59, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_bias_ = (None) + x_60 = torch._C._nn.linear( + layer_norm_12, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_12 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_61 = torch._C._nn.gelu(x_60) + x_60 = None + x_62 = torch.nn.functional.dropout(x_61, 0.0, False, False) + x_61 = None + x_63 = torch._C._nn.linear( + x_62, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_62 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_64 = torch.nn.functional.dropout(x_63, 0.0, False, False) + x_63 = None + x_65 = x_59 + x_64 + x_59 = x_64 = None + x_66 = torch.nn.functional.layer_norm( + x_65, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_bias_ = (None) + linear_13 = torch._C._nn.linear( + x_66, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_6 = linear_13.view(1, 249, 16, 64) + linear_13 = None + q_2 = view_6.transpose(2, 1) + view_6 = None + linear_14 = torch._C._nn.linear( + x_66, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_7 = linear_14.view(1, 249, 16, 64) + linear_14 = None + k_2 = view_7.transpose(2, 1) + view_7 = None + linear_15 = torch._C._nn.linear( + x_66, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_bias_, + ) + x_66 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_8 = linear_15.view(1, 249, 16, 64) + linear_15 = None + v_2 = view_8.transpose(2, 1) + view_8 = None + attn_output_4 = torch._C._nn.scaled_dot_product_attention( + q_2, k_2, v_2, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_2 = k_2 = v_2 = None + transpose_28 = attn_output_4.transpose(1, 2) + attn_output_4 = None + attn_output_5 = transpose_28.reshape(1, -1, 1024) + transpose_28 = None + output_2 = torch._C._nn.linear( + attn_output_5, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_5 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_67 = torch.nn.functional.dropout(output_2, 0.0, False, False) + output_2 = None + x_68 = x_65 + x_67 + x_65 = x_67 = None + layer_norm_14 = torch.nn.functional.layer_norm( + x_68, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_bias_ = (None) + x_69 = torch._C._nn.linear( + layer_norm_14, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_14 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_70 = torch._C._nn.gelu(x_69) + x_69 = None + x_71 = torch.nn.functional.dropout(x_70, 0.0, False, False) + x_70 = None + x_72 = torch._C._nn.linear( + x_71, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_71 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_73 = torch.nn.functional.dropout(x_72, 0.0, False, False) + x_72 = None + x_74 = x_68 + x_73 + x_68 = x_73 = None + x_75 = torch.nn.functional.layer_norm( + x_74, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_bias_ = (None) + linear_19 = torch._C._nn.linear( + x_75, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_9 = linear_19.view(1, 249, 16, 64) + linear_19 = None + q_3 = view_9.transpose(2, 1) + view_9 = None + linear_20 = torch._C._nn.linear( + x_75, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_10 = linear_20.view(1, 249, 16, 64) + linear_20 = None + k_3 = view_10.transpose(2, 1) + view_10 = None + linear_21 = torch._C._nn.linear( + x_75, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_bias_, + ) + x_75 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_11 = linear_21.view(1, 249, 16, 64) + linear_21 = None + v_3 = view_11.transpose(2, 1) + view_11 = None + attn_output_6 = torch._C._nn.scaled_dot_product_attention( + q_3, k_3, v_3, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_3 = k_3 = v_3 = None + transpose_32 = attn_output_6.transpose(1, 2) + attn_output_6 = None + attn_output_7 = transpose_32.reshape(1, -1, 1024) + transpose_32 = None + output_3 = torch._C._nn.linear( + attn_output_7, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_7 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_76 = torch.nn.functional.dropout(output_3, 0.0, False, False) + output_3 = None + x_77 = x_74 + x_76 + x_74 = x_76 = None + layer_norm_16 = torch.nn.functional.layer_norm( + x_77, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_bias_ = (None) + x_78 = torch._C._nn.linear( + layer_norm_16, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_16 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_79 = torch._C._nn.gelu(x_78) + x_78 = None + x_80 = torch.nn.functional.dropout(x_79, 0.0, False, False) + x_79 = None + x_81 = torch._C._nn.linear( + x_80, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_80 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_82 = torch.nn.functional.dropout(x_81, 0.0, False, False) + x_81 = None + x_83 = x_77 + x_82 + x_77 = x_82 = None + x_84 = torch.nn.functional.layer_norm( + x_83, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_bias_ = (None) + linear_25 = torch._C._nn.linear( + x_84, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_12 = linear_25.view(1, 249, 16, 64) + linear_25 = None + q_4 = view_12.transpose(2, 1) + view_12 = None + linear_26 = torch._C._nn.linear( + x_84, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_13 = linear_26.view(1, 249, 16, 64) + linear_26 = None + k_4 = view_13.transpose(2, 1) + view_13 = None + linear_27 = torch._C._nn.linear( + x_84, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_bias_, + ) + x_84 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_14 = linear_27.view(1, 249, 16, 64) + linear_27 = None + v_4 = view_14.transpose(2, 1) + view_14 = None + attn_output_8 = torch._C._nn.scaled_dot_product_attention( + q_4, k_4, v_4, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_4 = k_4 = v_4 = None + transpose_36 = attn_output_8.transpose(1, 2) + attn_output_8 = None + attn_output_9 = transpose_36.reshape(1, -1, 1024) + transpose_36 = None + output_4 = torch._C._nn.linear( + attn_output_9, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_9 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_85 = torch.nn.functional.dropout(output_4, 0.0, False, False) + output_4 = None + x_86 = x_83 + x_85 + x_83 = x_85 = None + layer_norm_18 = torch.nn.functional.layer_norm( + x_86, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_bias_ = (None) + x_87 = torch._C._nn.linear( + layer_norm_18, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_18 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_88 = torch._C._nn.gelu(x_87) + x_87 = None + x_89 = torch.nn.functional.dropout(x_88, 0.0, False, False) + x_88 = None + x_90 = torch._C._nn.linear( + x_89, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_89 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_91 = torch.nn.functional.dropout(x_90, 0.0, False, False) + x_90 = None + x_92 = x_86 + x_91 + x_86 = x_91 = None + x_93 = torch.nn.functional.layer_norm( + x_92, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_bias_ = (None) + linear_31 = torch._C._nn.linear( + x_93, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_15 = linear_31.view(1, 249, 16, 64) + linear_31 = None + q_5 = view_15.transpose(2, 1) + view_15 = None + linear_32 = torch._C._nn.linear( + x_93, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_16 = linear_32.view(1, 249, 16, 64) + linear_32 = None + k_5 = view_16.transpose(2, 1) + view_16 = None + linear_33 = torch._C._nn.linear( + x_93, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_bias_, + ) + x_93 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_17 = linear_33.view(1, 249, 16, 64) + linear_33 = None + v_5 = view_17.transpose(2, 1) + view_17 = None + attn_output_10 = torch._C._nn.scaled_dot_product_attention( + q_5, k_5, v_5, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_5 = k_5 = v_5 = None + transpose_40 = attn_output_10.transpose(1, 2) + attn_output_10 = None + attn_output_11 = transpose_40.reshape(1, -1, 1024) + transpose_40 = None + output_5 = torch._C._nn.linear( + attn_output_11, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_11 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_94 = torch.nn.functional.dropout(output_5, 0.0, False, False) + output_5 = None + x_95 = x_92 + x_94 + x_92 = x_94 = None + layer_norm_20 = torch.nn.functional.layer_norm( + x_95, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_bias_ = (None) + x_96 = torch._C._nn.linear( + layer_norm_20, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_20 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_97 = torch._C._nn.gelu(x_96) + x_96 = None + x_98 = torch.nn.functional.dropout(x_97, 0.0, False, False) + x_97 = None + x_99 = torch._C._nn.linear( + x_98, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_98 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_100 = torch.nn.functional.dropout(x_99, 0.0, False, False) + x_99 = None + x_101 = x_95 + x_100 + x_95 = x_100 = None + x_102 = torch.nn.functional.layer_norm( + x_101, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_bias_ = (None) + linear_37 = torch._C._nn.linear( + x_102, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_18 = linear_37.view(1, 249, 16, 64) + linear_37 = None + q_6 = view_18.transpose(2, 1) + view_18 = None + linear_38 = torch._C._nn.linear( + x_102, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_19 = linear_38.view(1, 249, 16, 64) + linear_38 = None + k_6 = view_19.transpose(2, 1) + view_19 = None + linear_39 = torch._C._nn.linear( + x_102, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_bias_, + ) + x_102 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_20 = linear_39.view(1, 249, 16, 64) + linear_39 = None + v_6 = view_20.transpose(2, 1) + view_20 = None + attn_output_12 = torch._C._nn.scaled_dot_product_attention( + q_6, k_6, v_6, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_6 = k_6 = v_6 = None + transpose_44 = attn_output_12.transpose(1, 2) + attn_output_12 = None + attn_output_13 = transpose_44.reshape(1, -1, 1024) + transpose_44 = None + output_6 = torch._C._nn.linear( + attn_output_13, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_13 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_103 = torch.nn.functional.dropout(output_6, 0.0, False, False) + output_6 = None + x_104 = x_101 + x_103 + x_101 = x_103 = None + layer_norm_22 = torch.nn.functional.layer_norm( + x_104, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_bias_ = (None) + x_105 = torch._C._nn.linear( + layer_norm_22, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_22 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_106 = torch._C._nn.gelu(x_105) + x_105 = None + x_107 = torch.nn.functional.dropout(x_106, 0.0, False, False) + x_106 = None + x_108 = torch._C._nn.linear( + x_107, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_107 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_109 = torch.nn.functional.dropout(x_108, 0.0, False, False) + x_108 = None + x_110 = x_104 + x_109 + x_104 = x_109 = None + x_111 = torch.nn.functional.layer_norm( + x_110, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_bias_ = (None) + linear_43 = torch._C._nn.linear( + x_111, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_21 = linear_43.view(1, 249, 16, 64) + linear_43 = None + q_7 = view_21.transpose(2, 1) + view_21 = None + linear_44 = torch._C._nn.linear( + x_111, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_22 = linear_44.view(1, 249, 16, 64) + linear_44 = None + k_7 = view_22.transpose(2, 1) + view_22 = None + linear_45 = torch._C._nn.linear( + x_111, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_bias_, + ) + x_111 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_23 = linear_45.view(1, 249, 16, 64) + linear_45 = None + v_7 = view_23.transpose(2, 1) + view_23 = None + attn_output_14 = torch._C._nn.scaled_dot_product_attention( + q_7, k_7, v_7, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_7 = k_7 = v_7 = None + transpose_48 = attn_output_14.transpose(1, 2) + attn_output_14 = None + attn_output_15 = transpose_48.reshape(1, -1, 1024) + transpose_48 = None + output_7 = torch._C._nn.linear( + attn_output_15, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_15 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_112 = torch.nn.functional.dropout(output_7, 0.0, False, False) + output_7 = None + x_113 = x_110 + x_112 + x_110 = x_112 = None + layer_norm_24 = torch.nn.functional.layer_norm( + x_113, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_bias_ = (None) + x_114 = torch._C._nn.linear( + layer_norm_24, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_24 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_115 = torch._C._nn.gelu(x_114) + x_114 = None + x_116 = torch.nn.functional.dropout(x_115, 0.0, False, False) + x_115 = None + x_117 = torch._C._nn.linear( + x_116, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_116 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_118 = torch.nn.functional.dropout(x_117, 0.0, False, False) + x_117 = None + x_119 = x_113 + x_118 + x_113 = x_118 = None + x_120 = torch.nn.functional.layer_norm( + x_119, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_bias_ = (None) + linear_49 = torch._C._nn.linear( + x_120, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_24 = linear_49.view(1, 249, 16, 64) + linear_49 = None + q_8 = view_24.transpose(2, 1) + view_24 = None + linear_50 = torch._C._nn.linear( + x_120, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_25 = linear_50.view(1, 249, 16, 64) + linear_50 = None + k_8 = view_25.transpose(2, 1) + view_25 = None + linear_51 = torch._C._nn.linear( + x_120, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_bias_, + ) + x_120 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_26 = linear_51.view(1, 249, 16, 64) + linear_51 = None + v_8 = view_26.transpose(2, 1) + view_26 = None + attn_output_16 = torch._C._nn.scaled_dot_product_attention( + q_8, k_8, v_8, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_8 = k_8 = v_8 = None + transpose_52 = attn_output_16.transpose(1, 2) + attn_output_16 = None + attn_output_17 = transpose_52.reshape(1, -1, 1024) + transpose_52 = None + output_8 = torch._C._nn.linear( + attn_output_17, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_17 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_121 = torch.nn.functional.dropout(output_8, 0.0, False, False) + output_8 = None + x_122 = x_119 + x_121 + x_119 = x_121 = None + layer_norm_26 = torch.nn.functional.layer_norm( + x_122, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_bias_ = (None) + x_123 = torch._C._nn.linear( + layer_norm_26, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_26 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_124 = torch._C._nn.gelu(x_123) + x_123 = None + x_125 = torch.nn.functional.dropout(x_124, 0.0, False, False) + x_124 = None + x_126 = torch._C._nn.linear( + x_125, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_125 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_127 = torch.nn.functional.dropout(x_126, 0.0, False, False) + x_126 = None + x_128 = x_122 + x_127 + x_122 = x_127 = None + x_129 = torch.nn.functional.layer_norm( + x_128, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_bias_ = (None) + linear_55 = torch._C._nn.linear( + x_129, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_27 = linear_55.view(1, 249, 16, 64) + linear_55 = None + q_9 = view_27.transpose(2, 1) + view_27 = None + linear_56 = torch._C._nn.linear( + x_129, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_28 = linear_56.view(1, 249, 16, 64) + linear_56 = None + k_9 = view_28.transpose(2, 1) + view_28 = None + linear_57 = torch._C._nn.linear( + x_129, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_bias_, + ) + x_129 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_29 = linear_57.view(1, 249, 16, 64) + linear_57 = None + v_9 = view_29.transpose(2, 1) + view_29 = None + attn_output_18 = torch._C._nn.scaled_dot_product_attention( + q_9, k_9, v_9, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_9 = k_9 = v_9 = None + transpose_56 = attn_output_18.transpose(1, 2) + attn_output_18 = None + attn_output_19 = transpose_56.reshape(1, -1, 1024) + transpose_56 = None + output_9 = torch._C._nn.linear( + attn_output_19, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_19 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_130 = torch.nn.functional.dropout(output_9, 0.0, False, False) + output_9 = None + x_131 = x_128 + x_130 + x_128 = x_130 = None + layer_norm_28 = torch.nn.functional.layer_norm( + x_131, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_bias_ = (None) + x_132 = torch._C._nn.linear( + layer_norm_28, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_28 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_133 = torch._C._nn.gelu(x_132) + x_132 = None + x_134 = torch.nn.functional.dropout(x_133, 0.0, False, False) + x_133 = None + x_135 = torch._C._nn.linear( + x_134, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_134 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_136 = torch.nn.functional.dropout(x_135, 0.0, False, False) + x_135 = None + x_137 = x_131 + x_136 + x_131 = x_136 = None + x_138 = torch.nn.functional.layer_norm( + x_137, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_bias_ = (None) + linear_61 = torch._C._nn.linear( + x_138, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_30 = linear_61.view(1, 249, 16, 64) + linear_61 = None + q_10 = view_30.transpose(2, 1) + view_30 = None + linear_62 = torch._C._nn.linear( + x_138, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_31 = linear_62.view(1, 249, 16, 64) + linear_62 = None + k_10 = view_31.transpose(2, 1) + view_31 = None + linear_63 = torch._C._nn.linear( + x_138, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_bias_, + ) + x_138 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_32 = linear_63.view(1, 249, 16, 64) + linear_63 = None + v_10 = view_32.transpose(2, 1) + view_32 = None + attn_output_20 = torch._C._nn.scaled_dot_product_attention( + q_10, k_10, v_10, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_10 = k_10 = v_10 = None + transpose_60 = attn_output_20.transpose(1, 2) + attn_output_20 = None + attn_output_21 = transpose_60.reshape(1, -1, 1024) + transpose_60 = None + output_10 = torch._C._nn.linear( + attn_output_21, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_21 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_139 = torch.nn.functional.dropout(output_10, 0.0, False, False) + output_10 = None + x_140 = x_137 + x_139 + x_137 = x_139 = None + layer_norm_30 = torch.nn.functional.layer_norm( + x_140, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_bias_ = (None) + x_141 = torch._C._nn.linear( + layer_norm_30, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_30 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_142 = torch._C._nn.gelu(x_141) + x_141 = None + x_143 = torch.nn.functional.dropout(x_142, 0.0, False, False) + x_142 = None + x_144 = torch._C._nn.linear( + x_143, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_143 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_145 = torch.nn.functional.dropout(x_144, 0.0, False, False) + x_144 = None + x_146 = x_140 + x_145 + x_140 = x_145 = None + x_147 = torch.nn.functional.layer_norm( + x_146, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_bias_ = (None) + linear_67 = torch._C._nn.linear( + x_147, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_33 = linear_67.view(1, 249, 16, 64) + linear_67 = None + q_11 = view_33.transpose(2, 1) + view_33 = None + linear_68 = torch._C._nn.linear( + x_147, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_34 = linear_68.view(1, 249, 16, 64) + linear_68 = None + k_11 = view_34.transpose(2, 1) + view_34 = None + linear_69 = torch._C._nn.linear( + x_147, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_bias_, + ) + x_147 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_35 = linear_69.view(1, 249, 16, 64) + linear_69 = None + v_11 = view_35.transpose(2, 1) + view_35 = None + attn_output_22 = torch._C._nn.scaled_dot_product_attention( + q_11, k_11, v_11, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_11 = k_11 = v_11 = None + transpose_64 = attn_output_22.transpose(1, 2) + attn_output_22 = None + attn_output_23 = transpose_64.reshape(1, -1, 1024) + transpose_64 = None + output_11 = torch._C._nn.linear( + attn_output_23, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_23 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_148 = torch.nn.functional.dropout(output_11, 0.0, False, False) + output_11 = None + x_149 = x_146 + x_148 + x_146 = x_148 = None + layer_norm_32 = torch.nn.functional.layer_norm( + x_149, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_bias_ = (None) + x_150 = torch._C._nn.linear( + layer_norm_32, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_32 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_151 = torch._C._nn.gelu(x_150) + x_150 = None + x_152 = torch.nn.functional.dropout(x_151, 0.0, False, False) + x_151 = None + x_153 = torch._C._nn.linear( + x_152, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_152 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_154 = torch.nn.functional.dropout(x_153, 0.0, False, False) + x_153 = None + x_155 = x_149 + x_154 + x_149 = x_154 = None + x_156 = torch.nn.functional.layer_norm( + x_155, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_bias_ = (None) + linear_73 = torch._C._nn.linear( + x_156, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_36 = linear_73.view(1, 249, 16, 64) + linear_73 = None + q_12 = view_36.transpose(2, 1) + view_36 = None + linear_74 = torch._C._nn.linear( + x_156, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_37 = linear_74.view(1, 249, 16, 64) + linear_74 = None + k_12 = view_37.transpose(2, 1) + view_37 = None + linear_75 = torch._C._nn.linear( + x_156, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_bias_, + ) + x_156 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_38 = linear_75.view(1, 249, 16, 64) + linear_75 = None + v_12 = view_38.transpose(2, 1) + view_38 = None + attn_output_24 = torch._C._nn.scaled_dot_product_attention( + q_12, k_12, v_12, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_12 = k_12 = v_12 = None + transpose_68 = attn_output_24.transpose(1, 2) + attn_output_24 = None + attn_output_25 = transpose_68.reshape(1, -1, 1024) + transpose_68 = None + output_12 = torch._C._nn.linear( + attn_output_25, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_25 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_157 = torch.nn.functional.dropout(output_12, 0.0, False, False) + output_12 = None + x_158 = x_155 + x_157 + x_155 = x_157 = None + layer_norm_34 = torch.nn.functional.layer_norm( + x_158, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_bias_ = (None) + x_159 = torch._C._nn.linear( + layer_norm_34, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_34 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_160 = torch._C._nn.gelu(x_159) + x_159 = None + x_161 = torch.nn.functional.dropout(x_160, 0.0, False, False) + x_160 = None + x_162 = torch._C._nn.linear( + x_161, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_161 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_163 = torch.nn.functional.dropout(x_162, 0.0, False, False) + x_162 = None + x_164 = x_158 + x_163 + x_158 = x_163 = None + x_165 = torch.nn.functional.layer_norm( + x_164, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_bias_ = (None) + linear_79 = torch._C._nn.linear( + x_165, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_39 = linear_79.view(1, 249, 16, 64) + linear_79 = None + q_13 = view_39.transpose(2, 1) + view_39 = None + linear_80 = torch._C._nn.linear( + x_165, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_40 = linear_80.view(1, 249, 16, 64) + linear_80 = None + k_13 = view_40.transpose(2, 1) + view_40 = None + linear_81 = torch._C._nn.linear( + x_165, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_bias_, + ) + x_165 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_41 = linear_81.view(1, 249, 16, 64) + linear_81 = None + v_13 = view_41.transpose(2, 1) + view_41 = None + attn_output_26 = torch._C._nn.scaled_dot_product_attention( + q_13, k_13, v_13, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_13 = k_13 = v_13 = None + transpose_72 = attn_output_26.transpose(1, 2) + attn_output_26 = None + attn_output_27 = transpose_72.reshape(1, -1, 1024) + transpose_72 = None + output_13 = torch._C._nn.linear( + attn_output_27, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_27 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_166 = torch.nn.functional.dropout(output_13, 0.0, False, False) + output_13 = None + x_167 = x_164 + x_166 + x_164 = x_166 = None + layer_norm_36 = torch.nn.functional.layer_norm( + x_167, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_bias_ = (None) + x_168 = torch._C._nn.linear( + layer_norm_36, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_36 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_169 = torch._C._nn.gelu(x_168) + x_168 = None + x_170 = torch.nn.functional.dropout(x_169, 0.0, False, False) + x_169 = None + x_171 = torch._C._nn.linear( + x_170, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_170 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_172 = torch.nn.functional.dropout(x_171, 0.0, False, False) + x_171 = None + x_173 = x_167 + x_172 + x_167 = x_172 = None + x_174 = torch.nn.functional.layer_norm( + x_173, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_bias_ = (None) + linear_85 = torch._C._nn.linear( + x_174, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_42 = linear_85.view(1, 249, 16, 64) + linear_85 = None + q_14 = view_42.transpose(2, 1) + view_42 = None + linear_86 = torch._C._nn.linear( + x_174, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_43 = linear_86.view(1, 249, 16, 64) + linear_86 = None + k_14 = view_43.transpose(2, 1) + view_43 = None + linear_87 = torch._C._nn.linear( + x_174, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_bias_, + ) + x_174 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_44 = linear_87.view(1, 249, 16, 64) + linear_87 = None + v_14 = view_44.transpose(2, 1) + view_44 = None + attn_output_28 = torch._C._nn.scaled_dot_product_attention( + q_14, k_14, v_14, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_14 = k_14 = v_14 = None + transpose_76 = attn_output_28.transpose(1, 2) + attn_output_28 = None + attn_output_29 = transpose_76.reshape(1, -1, 1024) + transpose_76 = None + output_14 = torch._C._nn.linear( + attn_output_29, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_29 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_175 = torch.nn.functional.dropout(output_14, 0.0, False, False) + output_14 = None + x_176 = x_173 + x_175 + x_173 = x_175 = None + layer_norm_38 = torch.nn.functional.layer_norm( + x_176, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_bias_ = (None) + x_177 = torch._C._nn.linear( + layer_norm_38, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_38 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_178 = torch._C._nn.gelu(x_177) + x_177 = None + x_179 = torch.nn.functional.dropout(x_178, 0.0, False, False) + x_178 = None + x_180 = torch._C._nn.linear( + x_179, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_179 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_181 = torch.nn.functional.dropout(x_180, 0.0, False, False) + x_180 = None + x_182 = x_176 + x_181 + x_176 = x_181 = None + x_183 = torch.nn.functional.layer_norm( + x_182, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_bias_ = (None) + linear_91 = torch._C._nn.linear( + x_183, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_45 = linear_91.view(1, 249, 16, 64) + linear_91 = None + q_15 = view_45.transpose(2, 1) + view_45 = None + linear_92 = torch._C._nn.linear( + x_183, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_46 = linear_92.view(1, 249, 16, 64) + linear_92 = None + k_15 = view_46.transpose(2, 1) + view_46 = None + linear_93 = torch._C._nn.linear( + x_183, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_bias_, + ) + x_183 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_47 = linear_93.view(1, 249, 16, 64) + linear_93 = None + v_15 = view_47.transpose(2, 1) + view_47 = None + attn_output_30 = torch._C._nn.scaled_dot_product_attention( + q_15, k_15, v_15, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_15 = k_15 = v_15 = None + transpose_80 = attn_output_30.transpose(1, 2) + attn_output_30 = None + attn_output_31 = transpose_80.reshape(1, -1, 1024) + transpose_80 = None + output_15 = torch._C._nn.linear( + attn_output_31, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_31 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_184 = torch.nn.functional.dropout(output_15, 0.0, False, False) + output_15 = None + x_185 = x_182 + x_184 + x_182 = x_184 = None + layer_norm_40 = torch.nn.functional.layer_norm( + x_185, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_bias_ = (None) + x_186 = torch._C._nn.linear( + layer_norm_40, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_40 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_187 = torch._C._nn.gelu(x_186) + x_186 = None + x_188 = torch.nn.functional.dropout(x_187, 0.0, False, False) + x_187 = None + x_189 = torch._C._nn.linear( + x_188, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_188 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_190 = torch.nn.functional.dropout(x_189, 0.0, False, False) + x_189 = None + x_191 = x_185 + x_190 + x_185 = x_190 = None + x_192 = torch.nn.functional.layer_norm( + x_191, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_bias_ = (None) + linear_97 = torch._C._nn.linear( + x_192, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_48 = linear_97.view(1, 249, 16, 64) + linear_97 = None + q_16 = view_48.transpose(2, 1) + view_48 = None + linear_98 = torch._C._nn.linear( + x_192, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_49 = linear_98.view(1, 249, 16, 64) + linear_98 = None + k_16 = view_49.transpose(2, 1) + view_49 = None + linear_99 = torch._C._nn.linear( + x_192, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_bias_, + ) + x_192 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_50 = linear_99.view(1, 249, 16, 64) + linear_99 = None + v_16 = view_50.transpose(2, 1) + view_50 = None + attn_output_32 = torch._C._nn.scaled_dot_product_attention( + q_16, k_16, v_16, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_16 = k_16 = v_16 = None + transpose_84 = attn_output_32.transpose(1, 2) + attn_output_32 = None + attn_output_33 = transpose_84.reshape(1, -1, 1024) + transpose_84 = None + output_16 = torch._C._nn.linear( + attn_output_33, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_33 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_193 = torch.nn.functional.dropout(output_16, 0.0, False, False) + output_16 = None + x_194 = x_191 + x_193 + x_191 = x_193 = None + layer_norm_42 = torch.nn.functional.layer_norm( + x_194, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_bias_ = (None) + x_195 = torch._C._nn.linear( + layer_norm_42, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_42 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_196 = torch._C._nn.gelu(x_195) + x_195 = None + x_197 = torch.nn.functional.dropout(x_196, 0.0, False, False) + x_196 = None + x_198 = torch._C._nn.linear( + x_197, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_197 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_199 = torch.nn.functional.dropout(x_198, 0.0, False, False) + x_198 = None + x_200 = x_194 + x_199 + x_194 = x_199 = None + x_201 = torch.nn.functional.layer_norm( + x_200, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_bias_ = (None) + linear_103 = torch._C._nn.linear( + x_201, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_51 = linear_103.view(1, 249, 16, 64) + linear_103 = None + q_17 = view_51.transpose(2, 1) + view_51 = None + linear_104 = torch._C._nn.linear( + x_201, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_52 = linear_104.view(1, 249, 16, 64) + linear_104 = None + k_17 = view_52.transpose(2, 1) + view_52 = None + linear_105 = torch._C._nn.linear( + x_201, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_bias_, + ) + x_201 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_53 = linear_105.view(1, 249, 16, 64) + linear_105 = None + v_17 = view_53.transpose(2, 1) + view_53 = None + attn_output_34 = torch._C._nn.scaled_dot_product_attention( + q_17, k_17, v_17, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_17 = k_17 = v_17 = None + transpose_88 = attn_output_34.transpose(1, 2) + attn_output_34 = None + attn_output_35 = transpose_88.reshape(1, -1, 1024) + transpose_88 = None + output_17 = torch._C._nn.linear( + attn_output_35, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_35 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_202 = torch.nn.functional.dropout(output_17, 0.0, False, False) + output_17 = None + x_203 = x_200 + x_202 + x_200 = x_202 = None + layer_norm_44 = torch.nn.functional.layer_norm( + x_203, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_bias_ = (None) + x_204 = torch._C._nn.linear( + layer_norm_44, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_44 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_205 = torch._C._nn.gelu(x_204) + x_204 = None + x_206 = torch.nn.functional.dropout(x_205, 0.0, False, False) + x_205 = None + x_207 = torch._C._nn.linear( + x_206, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_206 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_208 = torch.nn.functional.dropout(x_207, 0.0, False, False) + x_207 = None + x_209 = x_203 + x_208 + x_203 = x_208 = None + x_210 = torch.nn.functional.layer_norm( + x_209, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_bias_ = (None) + linear_109 = torch._C._nn.linear( + x_210, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_54 = linear_109.view(1, 249, 16, 64) + linear_109 = None + q_18 = view_54.transpose(2, 1) + view_54 = None + linear_110 = torch._C._nn.linear( + x_210, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_55 = linear_110.view(1, 249, 16, 64) + linear_110 = None + k_18 = view_55.transpose(2, 1) + view_55 = None + linear_111 = torch._C._nn.linear( + x_210, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_bias_, + ) + x_210 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_56 = linear_111.view(1, 249, 16, 64) + linear_111 = None + v_18 = view_56.transpose(2, 1) + view_56 = None + attn_output_36 = torch._C._nn.scaled_dot_product_attention( + q_18, k_18, v_18, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_18 = k_18 = v_18 = None + transpose_92 = attn_output_36.transpose(1, 2) + attn_output_36 = None + attn_output_37 = transpose_92.reshape(1, -1, 1024) + transpose_92 = None + output_18 = torch._C._nn.linear( + attn_output_37, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_37 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_211 = torch.nn.functional.dropout(output_18, 0.0, False, False) + output_18 = None + x_212 = x_209 + x_211 + x_209 = x_211 = None + layer_norm_46 = torch.nn.functional.layer_norm( + x_212, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_bias_ = (None) + x_213 = torch._C._nn.linear( + layer_norm_46, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_46 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_214 = torch._C._nn.gelu(x_213) + x_213 = None + x_215 = torch.nn.functional.dropout(x_214, 0.0, False, False) + x_214 = None + x_216 = torch._C._nn.linear( + x_215, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_215 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_217 = torch.nn.functional.dropout(x_216, 0.0, False, False) + x_216 = None + x_218 = x_212 + x_217 + x_212 = x_217 = None + x_219 = torch.nn.functional.layer_norm( + x_218, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_bias_ = (None) + linear_115 = torch._C._nn.linear( + x_219, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_57 = linear_115.view(1, 249, 16, 64) + linear_115 = None + q_19 = view_57.transpose(2, 1) + view_57 = None + linear_116 = torch._C._nn.linear( + x_219, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_58 = linear_116.view(1, 249, 16, 64) + linear_116 = None + k_19 = view_58.transpose(2, 1) + view_58 = None + linear_117 = torch._C._nn.linear( + x_219, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_bias_, + ) + x_219 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_59 = linear_117.view(1, 249, 16, 64) + linear_117 = None + v_19 = view_59.transpose(2, 1) + view_59 = None + attn_output_38 = torch._C._nn.scaled_dot_product_attention( + q_19, k_19, v_19, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_19 = k_19 = v_19 = None + transpose_96 = attn_output_38.transpose(1, 2) + attn_output_38 = None + attn_output_39 = transpose_96.reshape(1, -1, 1024) + transpose_96 = None + output_19 = torch._C._nn.linear( + attn_output_39, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_39 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_220 = torch.nn.functional.dropout(output_19, 0.0, False, False) + output_19 = None + x_221 = x_218 + x_220 + x_218 = x_220 = None + layer_norm_48 = torch.nn.functional.layer_norm( + x_221, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_bias_ = (None) + x_222 = torch._C._nn.linear( + layer_norm_48, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_48 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_223 = torch._C._nn.gelu(x_222) + x_222 = None + x_224 = torch.nn.functional.dropout(x_223, 0.0, False, False) + x_223 = None + x_225 = torch._C._nn.linear( + x_224, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_224 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_226 = torch.nn.functional.dropout(x_225, 0.0, False, False) + x_225 = None + x_227 = x_221 + x_226 + x_221 = x_226 = None + x_228 = torch.nn.functional.layer_norm( + x_227, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_bias_ = (None) + linear_121 = torch._C._nn.linear( + x_228, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_60 = linear_121.view(1, 249, 16, 64) + linear_121 = None + q_20 = view_60.transpose(2, 1) + view_60 = None + linear_122 = torch._C._nn.linear( + x_228, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_61 = linear_122.view(1, 249, 16, 64) + linear_122 = None + k_20 = view_61.transpose(2, 1) + view_61 = None + linear_123 = torch._C._nn.linear( + x_228, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_bias_, + ) + x_228 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_62 = linear_123.view(1, 249, 16, 64) + linear_123 = None + v_20 = view_62.transpose(2, 1) + view_62 = None + attn_output_40 = torch._C._nn.scaled_dot_product_attention( + q_20, k_20, v_20, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_20 = k_20 = v_20 = None + transpose_100 = attn_output_40.transpose(1, 2) + attn_output_40 = None + attn_output_41 = transpose_100.reshape(1, -1, 1024) + transpose_100 = None + output_20 = torch._C._nn.linear( + attn_output_41, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_41 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_229 = torch.nn.functional.dropout(output_20, 0.0, False, False) + output_20 = None + x_230 = x_227 + x_229 + x_227 = x_229 = None + layer_norm_50 = torch.nn.functional.layer_norm( + x_230, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_bias_ = (None) + x_231 = torch._C._nn.linear( + layer_norm_50, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_50 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_232 = torch._C._nn.gelu(x_231) + x_231 = None + x_233 = torch.nn.functional.dropout(x_232, 0.0, False, False) + x_232 = None + x_234 = torch._C._nn.linear( + x_233, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_233 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_235 = torch.nn.functional.dropout(x_234, 0.0, False, False) + x_234 = None + x_236 = x_230 + x_235 + x_230 = x_235 = None + x_237 = torch.nn.functional.layer_norm( + x_236, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_bias_ = (None) + linear_127 = torch._C._nn.linear( + x_237, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_63 = linear_127.view(1, 249, 16, 64) + linear_127 = None + q_21 = view_63.transpose(2, 1) + view_63 = None + linear_128 = torch._C._nn.linear( + x_237, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_64 = linear_128.view(1, 249, 16, 64) + linear_128 = None + k_21 = view_64.transpose(2, 1) + view_64 = None + linear_129 = torch._C._nn.linear( + x_237, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_bias_, + ) + x_237 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_65 = linear_129.view(1, 249, 16, 64) + linear_129 = None + v_21 = view_65.transpose(2, 1) + view_65 = None + attn_output_42 = torch._C._nn.scaled_dot_product_attention( + q_21, k_21, v_21, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_21 = k_21 = v_21 = None + transpose_104 = attn_output_42.transpose(1, 2) + attn_output_42 = None + attn_output_43 = transpose_104.reshape(1, -1, 1024) + transpose_104 = None + output_21 = torch._C._nn.linear( + attn_output_43, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_43 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_238 = torch.nn.functional.dropout(output_21, 0.0, False, False) + output_21 = None + x_239 = x_236 + x_238 + x_236 = x_238 = None + layer_norm_52 = torch.nn.functional.layer_norm( + x_239, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_bias_ = (None) + x_240 = torch._C._nn.linear( + layer_norm_52, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_52 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_241 = torch._C._nn.gelu(x_240) + x_240 = None + x_242 = torch.nn.functional.dropout(x_241, 0.0, False, False) + x_241 = None + x_243 = torch._C._nn.linear( + x_242, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_242 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_244 = torch.nn.functional.dropout(x_243, 0.0, False, False) + x_243 = None + x_245 = x_239 + x_244 + x_239 = x_244 = None + x_246 = torch.nn.functional.layer_norm( + x_245, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_bias_ = (None) + linear_133 = torch._C._nn.linear( + x_246, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_66 = linear_133.view(1, 249, 16, 64) + linear_133 = None + q_22 = view_66.transpose(2, 1) + view_66 = None + linear_134 = torch._C._nn.linear( + x_246, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_67 = linear_134.view(1, 249, 16, 64) + linear_134 = None + k_22 = view_67.transpose(2, 1) + view_67 = None + linear_135 = torch._C._nn.linear( + x_246, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_bias_, + ) + x_246 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_68 = linear_135.view(1, 249, 16, 64) + linear_135 = None + v_22 = view_68.transpose(2, 1) + view_68 = None + attn_output_44 = torch._C._nn.scaled_dot_product_attention( + q_22, k_22, v_22, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_22 = k_22 = v_22 = None + transpose_108 = attn_output_44.transpose(1, 2) + attn_output_44 = None + attn_output_45 = transpose_108.reshape(1, -1, 1024) + transpose_108 = None + output_22 = torch._C._nn.linear( + attn_output_45, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_45 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_247 = torch.nn.functional.dropout(output_22, 0.0, False, False) + output_22 = None + x_248 = x_245 + x_247 + x_245 = x_247 = None + layer_norm_54 = torch.nn.functional.layer_norm( + x_248, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_bias_ = (None) + x_249 = torch._C._nn.linear( + layer_norm_54, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_54 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_250 = torch._C._nn.gelu(x_249) + x_249 = None + x_251 = torch.nn.functional.dropout(x_250, 0.0, False, False) + x_250 = None + x_252 = torch._C._nn.linear( + x_251, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_251 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_253 = torch.nn.functional.dropout(x_252, 0.0, False, False) + x_252 = None + x_254 = x_248 + x_253 + x_248 = x_253 = None + x_255 = torch.nn.functional.layer_norm( + x_254, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_bias_ = (None) + linear_139 = torch._C._nn.linear( + x_255, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_bias_ = (None) + view_69 = linear_139.view(1, 249, 16, 64) + linear_139 = None + q_23 = view_69.transpose(2, 1) + view_69 = None + linear_140 = torch._C._nn.linear( + x_255, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_bias_, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_bias_ = (None) + view_70 = linear_140.view(1, 249, 16, 64) + linear_140 = None + k_23 = view_70.transpose(2, 1) + view_70 = None + linear_141 = torch._C._nn.linear( + x_255, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_bias_, + ) + x_255 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_bias_ = (None) + view_71 = linear_141.view(1, 249, 16, 64) + linear_141 = None + v_23 = view_71.transpose(2, 1) + view_71 = None + attn_output_46 = torch._C._nn.scaled_dot_product_attention( + q_23, k_23, v_23, attn_mask=None, dropout_p=0.0, is_causal=False + ) + q_23 = k_23 = v_23 = None + transpose_112 = attn_output_46.transpose(1, 2) + attn_output_46 = None + attn_output_47 = transpose_112.reshape(1, -1, 1024) + transpose_112 = None + output_23 = torch._C._nn.linear( + attn_output_47, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_bias_, + ) + attn_output_47 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_bias_ = (None) + x_256 = torch.nn.functional.dropout(output_23, 0.0, False, False) + output_23 = None + x_257 = x_254 + x_256 + x_254 = x_256 = None + layer_norm_56 = torch.nn.functional.layer_norm( + x_257, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_bias_, + 1e-05, + ) + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_bias_ = (None) + x_258 = torch._C._nn.linear( + layer_norm_56, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_bias_, + ) + layer_norm_56 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_bias_ = (None) + x_259 = torch._C._nn.gelu(x_258) + x_258 = None + x_260 = torch.nn.functional.dropout(x_259, 0.0, False, False) + x_259 = None + x_261 = torch._C._nn.linear( + x_260, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_bias_, + ) + x_260 = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_bias_ = (None) + x_262 = torch.nn.functional.dropout(x_261, 0.0, False, False) + x_261 = None + x_263 = x_257 + x_262 + x_257 = x_262 = None + x_264 = torch.nn.functional.layer_norm( + x_263, + (1024,), + l_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_weight_, + l_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_bias_, + 1e-05, + ) + x_263 = l_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_weight_ = l_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_bias_ = (None) + return (x_264,) diff --git a/samples/torchaudio/hubert_large/weight_meta.py b/samples/torchaudio/hubert_large/weight_meta.py new file mode 100644 index 000000000..dfb6ce47b --- /dev/null +++ b/samples/torchaudio/hubert_large/weight_meta.py @@ -0,0 +1,4148 @@ +class Program_weight_tensor_meta_L_waveforms_: + name = "L_waveforms_" + shape = [1, 80000] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.003 + std = 1.000 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_conv_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_conv_parameters_weight_" + shape = [512, 1, 10] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.025 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_weight_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.266 + std = 0.121 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_0_modules_layer_norm_parameters_bias_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.041 + std = 0.108 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_conv_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_conv_parameters_weight_" + shape = [512, 512, 3] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.026 + std = 0.091 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_weight_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.709 + std = 0.570 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_1_modules_layer_norm_parameters_bias_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.101 + std = 0.140 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_conv_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_conv_parameters_weight_" + shape = [512, 512, 3] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.003 + std = 0.146 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_weight_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.798 + std = 0.393 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_2_modules_layer_norm_parameters_bias_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.192 + std = 0.170 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_conv_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_conv_parameters_weight_" + shape = [512, 512, 3] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.014 + std = 0.164 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_weight_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.634 + std = 0.303 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_3_modules_layer_norm_parameters_bias_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.186 + std = 0.179 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_conv_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_conv_parameters_weight_" + shape = [512, 512, 3] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.007 + std = 0.160 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_weight_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.532 + std = 0.252 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_4_modules_layer_norm_parameters_bias_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.140 + std = 0.194 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_conv_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_conv_parameters_weight_" + shape = [512, 512, 2] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.006 + std = 0.161 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_weight_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.412 + std = 0.230 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_5_modules_layer_norm_parameters_bias_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.132 + std = 0.233 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_conv_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_conv_parameters_weight_" + shape = [512, 512, 2] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.148 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_weight_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.094 + std = 0.064 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_feature_extractor_modules_conv_layers_modules_6_modules_layer_norm_parameters_bias_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.010 + std = 0.059 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_weight_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.268 + std = 0.148 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_feature_projection_modules_layer_norm_parameters_bias_" + shape = [512] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.044 + std = 0.091 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_weight_" + shape = [1024, 512] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.006 + std = 0.142 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_feature_projection_modules_projection_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.022 + std = 0.172 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original0_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original0_" + shape = [1, 1, 128] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.307 + std = 0.680 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original1_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_modules_parametrizations_modules_weight_parameters_original1_" + shape = [1024, 64, 128] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.005 + std = 0.067 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_pos_conv_embed_modules_conv_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.023 + std = 0.305 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.143 + std = 0.108 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.008 + std = 0.042 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.081 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.003 + std = 0.333 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.079 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.002 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.072 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.020 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.089 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.011 + std = 0.107 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.159 + std = 0.087 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.009 + std = 0.045 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.116 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.085 + std = 0.056 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.106 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_0_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.086 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.187 + std = 0.070 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.023 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.104 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.018 + std = 0.395 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.106 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.096 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.102 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.031 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.106 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.094 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.186 + std = 0.074 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.006 + std = 0.040 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.005 + std = 0.122 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.055 + std = 0.048 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.107 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_1_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.078 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.175 + std = 0.052 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.021 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.105 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.008 + std = 0.404 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.106 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.062 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.110 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.037 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.108 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.071 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.211 + std = 0.073 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.008 + std = 0.060 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.128 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.023 + std = 0.029 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.120 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_2_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.068 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.186 + std = 0.039 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.030 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.116 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.003 + std = 0.363 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.115 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.084 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.101 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.038 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.099 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.066 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.208 + std = 0.061 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.007 + std = 0.057 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.128 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.031 + std = 0.036 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.119 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_3_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.066 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.187 + std = 0.041 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.003 + std = 0.027 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.115 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.008 + std = 0.327 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.115 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.003 + std = 0.092 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.104 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.029 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.097 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.062 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.212 + std = 0.054 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.007 + std = 0.054 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.130 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.033 + std = 0.035 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.120 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_4_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.061 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.196 + std = 0.038 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.003 + std = 0.032 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.117 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.004 + std = 0.322 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.116 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.112 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.111 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.028 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.106 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.061 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.214 + std = 0.049 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.009 + std = 0.057 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.131 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.039 + std = 0.032 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.125 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_5_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.058 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.205 + std = 0.042 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.033 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.121 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.007 + std = 0.292 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.121 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.014 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.106 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.029 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.098 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.063 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.207 + std = 0.043 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.009 + std = 0.048 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.132 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.058 + std = 0.032 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.123 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_6_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.053 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.206 + std = 0.039 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.035 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.118 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.003 + std = 0.286 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.118 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.006 + std = 0.163 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.115 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.026 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.106 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.055 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.211 + std = 0.038 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.011 + std = 0.046 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.005 + std = 0.131 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.061 + std = 0.033 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.123 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_7_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.047 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.216 + std = 0.043 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.036 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.120 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.007 + std = 0.284 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.121 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.112 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.119 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.034 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.110 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.056 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.219 + std = 0.042 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.010 + std = 0.047 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.005 + std = 0.131 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.056 + std = 0.032 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.122 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_8_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.048 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.208 + std = 0.038 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.035 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.119 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.014 + std = 0.291 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.119 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.217 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.120 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.037 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.115 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.055 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.222 + std = 0.045 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.008 + std = 0.046 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.130 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.052 + std = 0.032 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.124 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_9_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.043 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.224 + std = 0.041 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.030 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.116 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.262 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.117 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.108 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.120 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.033 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.114 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.049 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.226 + std = 0.045 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.005 + std = 0.049 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.130 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.054 + std = 0.037 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.126 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_10_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.042 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.223 + std = 0.041 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.026 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.115 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.004 + std = 0.275 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.114 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.067 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.114 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.041 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.111 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.051 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.234 + std = 0.046 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.005 + std = 0.051 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.131 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.051 + std = 0.036 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.127 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_11_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.041 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.226 + std = 0.036 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.026 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.122 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.280 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.122 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.085 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.120 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.003 + std = 0.043 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.118 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.058 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.240 + std = 0.046 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.003 + std = 0.050 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.132 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.058 + std = 0.044 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.130 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_12_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.043 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.236 + std = 0.040 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.025 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.122 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.009 + std = 0.276 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.123 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.112 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.123 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.035 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.120 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.055 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.234 + std = 0.044 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.052 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.132 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.069 + std = 0.055 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.131 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_13_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.053 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.231 + std = 0.037 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.036 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.124 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.015 + std = 0.266 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.124 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.004 + std = 0.112 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.125 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.004 + std = 0.055 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.123 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.083 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.241 + std = 0.051 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.049 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.132 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.064 + std = 0.048 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.135 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_14_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.065 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.245 + std = 0.045 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.035 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.128 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.303 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.129 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.011 + std = 0.242 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.131 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.068 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.131 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.108 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.345 + std = 0.081 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.091 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.138 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.034 + std = 0.047 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.140 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_15_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.118 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.244 + std = 0.044 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.041 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.124 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.304 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.125 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.236 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.136 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.057 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.135 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.128 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.353 + std = 0.080 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.003 + std = 0.086 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.139 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.035 + std = 0.052 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.141 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_16_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.136 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.242 + std = 0.049 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.041 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.123 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.308 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.124 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.126 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.144 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.003 + std = 0.072 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.143 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.164 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.520 + std = 0.110 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.006 + std = 0.142 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.143 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.030 + std = 0.054 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.140 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_17_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.170 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.251 + std = 0.040 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.048 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.123 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.021 + std = 0.314 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.126 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.010 + std = 0.334 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.151 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.003 + std = 0.071 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.149 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.144 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.622 + std = 0.118 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.007 + std = 0.159 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.143 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.053 + std = 0.045 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.141 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_18_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.197 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.291 + std = 0.047 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.057 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.118 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.010 + std = 0.325 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.119 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.009 + std = 0.190 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.152 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.071 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.151 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.211 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.937 + std = 0.141 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.012 + std = 0.209 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.145 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.066 + std = 0.038 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.143 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_19_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.295 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.322 + std = 0.030 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.077 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.113 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.008 + std = 0.298 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.115 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.016 + std = 0.292 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.158 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.077 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.157 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.188 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.032 + std = 0.114 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.014 + std = 0.223 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.142 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.070 + std = 0.054 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.142 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_20_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.302 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.350 + std = 0.022 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.007 + std = 0.093 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.106 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.005 + std = 0.255 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.107 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.022 + std = 0.496 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.169 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.069 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.167 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.143 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.166 + std = 0.073 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.018 + std = 0.223 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.003 + std = 0.140 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.084 + std = 0.058 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.138 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_21_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.253 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.449 + std = 0.051 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.205 + std = 0.187 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.117 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.016 + std = 0.276 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.094 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.122 + std = 3.280 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.188 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.080 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.189 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.161 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.631 + std = 0.101 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.309 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.138 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.090 + std = 0.060 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.134 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_22_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.004 + std = 0.107 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.608 + std = 0.079 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.483 + std = 0.509 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.156 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_q_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.002 + std = 0.094 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.157 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_k_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.027 + std = 2.606 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.138 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_v_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.027 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_weight_" + shape = [1024, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.143 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_attention_modules_out_proj_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.001 + std = 0.111 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 1.776 + std = 0.135 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_final_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.045 + std = 0.573 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_weight_" + shape = [4096, 1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.002 + std = 0.133 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_intermediate_dense_parameters_bias_" + shape = [4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.088 + std = 0.076 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_weight_" + shape = [1024, 4096] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.000 + std = 0.127 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layers_modules_23_modules_feed_forward_modules_output_dense_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.000 + std = 0.037 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_weight_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_weight_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = 0.260 + std = 0.036 + data = None + + +class Program_weight_tensor_meta_L_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_bias_: + name = "L_self_modules_model_modules_encoder_modules_transformer_modules_layer_norm_parameters_bias_" + shape = [1024] + dtype = "torch.float32" + device = "cuda:0" + mean = -0.001 + std = 0.023 + data = None