@@ -2747,205 +2747,199 @@ struct llama_cparams {
27472747
27482748struct llama_layer_posnet {
27492749 // resnet
2750- struct ggml_tensor * norm1 = nullptr;
2750+ struct ggml_tensor * norm1 = nullptr;
27512751 struct ggml_tensor * norm1_b = nullptr;
27522752
2753- struct ggml_tensor * conv1 = nullptr;
2753+ struct ggml_tensor * conv1 = nullptr;
27542754 struct ggml_tensor * conv1_b = nullptr;
27552755
2756- struct ggml_tensor * norm2 = nullptr;
2756+ struct ggml_tensor * norm2 = nullptr;
27572757 struct ggml_tensor * norm2_b = nullptr;
27582758
2759- struct ggml_tensor * conv2 = nullptr;
2759+ struct ggml_tensor * conv2 = nullptr;
27602760 struct ggml_tensor * conv2_b = nullptr;
27612761
27622762 // attention
2763- struct ggml_tensor * attn_norm = nullptr;
2763+ struct ggml_tensor * attn_norm = nullptr;
27642764 struct ggml_tensor * attn_norm_b = nullptr;
27652765
2766- struct ggml_tensor * attn_q = nullptr;
2766+ struct ggml_tensor * attn_q = nullptr;
27672767 struct ggml_tensor * attn_q_b = nullptr;
27682768
2769- struct ggml_tensor * attn_k = nullptr;
2769+ struct ggml_tensor * attn_k = nullptr;
27702770 struct ggml_tensor * attn_k_b = nullptr;
27712771
2772- struct ggml_tensor * attn_v = nullptr;
2772+ struct ggml_tensor * attn_v = nullptr;
27732773 struct ggml_tensor * attn_v_b = nullptr;
27742774
2775- struct ggml_tensor * attn_o = nullptr;
2775+ struct ggml_tensor * attn_o = nullptr;
27762776 struct ggml_tensor * attn_o_b = nullptr;
27772777
27782778 // normalize
2779- struct ggml_tensor * norm = nullptr;
2779+ struct ggml_tensor * norm = nullptr;
27802780 struct ggml_tensor * norm_b = nullptr;
27812781};
27822782
27832783struct llama_layer_convnext {
2784- struct ggml_tensor * dw;
2785- struct ggml_tensor * dw_b;
2784+ struct ggml_tensor * dw = nullptr ;
2785+ struct ggml_tensor * dw_b = nullptr ;
27862786
2787- struct ggml_tensor * norm;
2788- struct ggml_tensor * norm_b;
2787+ struct ggml_tensor * norm = nullptr ;
2788+ struct ggml_tensor * norm_b = nullptr ;
27892789
2790- struct ggml_tensor * pw1;
2791- struct ggml_tensor * pw1_b;
2790+ struct ggml_tensor * pw1 = nullptr ;
2791+ struct ggml_tensor * pw1_b = nullptr ;
27922792
2793- struct ggml_tensor * pw2;
2794- struct ggml_tensor * pw2_b;
2793+ struct ggml_tensor * pw2 = nullptr ;
2794+ struct ggml_tensor * pw2_b = nullptr ;
27952795
2796- struct ggml_tensor * gamma;
2796+ struct ggml_tensor * gamma = nullptr ;
27972797};
27982798
2799- // TODO: separate into "llama_layer_enc" and "llama_layer_dec"
28002799struct llama_layer {
2801- llama_layer() {
2802- // initialize all pointers to NULL
2803- std::memset(this, 0, sizeof(*this));
2804- }
2805-
28062800 // normalization
2807- struct ggml_tensor * attn_norm;
2808- struct ggml_tensor * attn_norm_b;
2809- struct ggml_tensor * attn_norm_2;
2810- struct ggml_tensor * attn_norm_2_b;
2811- struct ggml_tensor * attn_q_norm;
2812- struct ggml_tensor * attn_q_norm_b;
2813- struct ggml_tensor * attn_k_norm;
2814- struct ggml_tensor * attn_k_norm_b;
2815- struct ggml_tensor * attn_out_norm;
2816- struct ggml_tensor * attn_out_norm_b;
2817- struct ggml_tensor * attn_q_a_norm;
2818- struct ggml_tensor * attn_kv_a_norm;
2819- struct ggml_tensor * attn_sub_norm;
2820- struct ggml_tensor * attn_post_norm;
2821- struct ggml_tensor * ffn_sub_norm;
2822- struct ggml_tensor * attn_norm_cross;
2823- struct ggml_tensor * attn_norm_enc;
2801+ struct ggml_tensor * attn_norm = nullptr ;
2802+ struct ggml_tensor * attn_norm_b = nullptr ;
2803+ struct ggml_tensor * attn_norm_2 = nullptr ;
2804+ struct ggml_tensor * attn_norm_2_b = nullptr ;
2805+ struct ggml_tensor * attn_q_norm = nullptr ;
2806+ struct ggml_tensor * attn_q_norm_b = nullptr ;
2807+ struct ggml_tensor * attn_k_norm = nullptr ;
2808+ struct ggml_tensor * attn_k_norm_b = nullptr ;
2809+ struct ggml_tensor * attn_out_norm = nullptr ;
2810+ struct ggml_tensor * attn_out_norm_b = nullptr ;
2811+ struct ggml_tensor * attn_q_a_norm = nullptr ;
2812+ struct ggml_tensor * attn_kv_a_norm = nullptr ;
2813+ struct ggml_tensor * attn_sub_norm = nullptr ;
2814+ struct ggml_tensor * attn_post_norm = nullptr ;
2815+ struct ggml_tensor * ffn_sub_norm = nullptr ;
2816+ struct ggml_tensor * attn_norm_cross = nullptr ;
2817+ struct ggml_tensor * attn_norm_enc = nullptr ;
28242818
28252819 // attention
2826- struct ggml_tensor * wq;
2827- struct ggml_tensor * wk;
2828- struct ggml_tensor * wv;
2829- struct ggml_tensor * wo;
2830- struct ggml_tensor * wqkv;
2831- struct ggml_tensor * wq_a;
2832- struct ggml_tensor * wq_b;
2833- struct ggml_tensor * wkv_a_mqa;
2834- struct ggml_tensor * wkv_b;
2835- struct ggml_tensor * wq_cross;
2836- struct ggml_tensor * wk_cross;
2837- struct ggml_tensor * wv_cross;
2838- struct ggml_tensor * wo_cross;
2839- struct ggml_tensor * wq_enc;
2840- struct ggml_tensor * wk_enc;
2841- struct ggml_tensor * wv_enc;
2842- struct ggml_tensor * wo_enc;
2820+ struct ggml_tensor * wq = nullptr ;
2821+ struct ggml_tensor * wk = nullptr ;
2822+ struct ggml_tensor * wv = nullptr ;
2823+ struct ggml_tensor * wo = nullptr ;
2824+ struct ggml_tensor * wqkv = nullptr ;
2825+ struct ggml_tensor * wq_a = nullptr ;
2826+ struct ggml_tensor * wq_b = nullptr ;
2827+ struct ggml_tensor * wkv_a_mqa = nullptr ;
2828+ struct ggml_tensor * wkv_b = nullptr ;
2829+ struct ggml_tensor * wq_cross = nullptr ;
2830+ struct ggml_tensor * wk_cross = nullptr ;
2831+ struct ggml_tensor * wv_cross = nullptr ;
2832+ struct ggml_tensor * wo_cross = nullptr ;
2833+ struct ggml_tensor * wq_enc = nullptr ;
2834+ struct ggml_tensor * wk_enc = nullptr ;
2835+ struct ggml_tensor * wv_enc = nullptr ;
2836+ struct ggml_tensor * wo_enc = nullptr ;
28432837
28442838 // attention bias
2845- struct ggml_tensor * bq;
2846- struct ggml_tensor * bk;
2847- struct ggml_tensor * bv;
2848- struct ggml_tensor * bo;
2849- struct ggml_tensor * bqkv;
2839+ struct ggml_tensor * bq = nullptr ;
2840+ struct ggml_tensor * bk = nullptr ;
2841+ struct ggml_tensor * bv = nullptr ;
2842+ struct ggml_tensor * bo = nullptr ;
2843+ struct ggml_tensor * bqkv = nullptr ;
28502844
28512845 // relative position bias
2852- struct ggml_tensor * attn_rel_b;
2853- struct ggml_tensor * attn_rel_b_enc;
2854- struct ggml_tensor * attn_rel_b_cross;
2846+ struct ggml_tensor * attn_rel_b = nullptr ;
2847+ struct ggml_tensor * attn_rel_b_enc = nullptr ;
2848+ struct ggml_tensor * attn_rel_b_cross = nullptr ;
28552849
28562850 // normalization
2857- struct ggml_tensor * ffn_norm;
2858- struct ggml_tensor * ffn_norm_b;
2859- struct ggml_tensor * ffn_post_norm;
2860- struct ggml_tensor * layer_out_norm;
2861- struct ggml_tensor * layer_out_norm_b;
2862- struct ggml_tensor * ffn_norm_exps;
2863- struct ggml_tensor * ffn_norm_enc;
2851+ struct ggml_tensor * ffn_norm = nullptr ;
2852+ struct ggml_tensor * ffn_norm_b = nullptr ;
2853+ struct ggml_tensor * ffn_post_norm = nullptr ;
2854+ struct ggml_tensor * layer_out_norm = nullptr ;
2855+ struct ggml_tensor * layer_out_norm_b = nullptr ;
2856+ struct ggml_tensor * ffn_norm_exps = nullptr ;
2857+ struct ggml_tensor * ffn_norm_enc = nullptr ;
28642858
28652859 // ff
2866- struct ggml_tensor * ffn_gate; // w1
2867- struct ggml_tensor * ffn_down; // w2
2868- struct ggml_tensor * ffn_up; // w3
2869- struct ggml_tensor * ffn_gate_enc;
2870- struct ggml_tensor * ffn_down_enc;
2871- struct ggml_tensor * ffn_up_enc;
2860+ struct ggml_tensor * ffn_gate = nullptr ; // w1
2861+ struct ggml_tensor * ffn_down = nullptr ; // w2
2862+ struct ggml_tensor * ffn_up = nullptr; // w3
2863+ struct ggml_tensor * ffn_gate_enc = nullptr ;
2864+ struct ggml_tensor * ffn_down_enc = nullptr ;
2865+ struct ggml_tensor * ffn_up_enc = nullptr ;
28722866
28732867 // ff MoE
2874- struct ggml_tensor * ffn_gate_inp;
2875- struct ggml_tensor * ffn_gate_exps;
2876- struct ggml_tensor * ffn_down_exps;
2877- struct ggml_tensor * ffn_up_exps ;
2868+ struct ggml_tensor * ffn_gate_inp = nullptr ;
2869+ struct ggml_tensor * ffn_gate_exps = nullptr ;
2870+ struct ggml_tensor * ffn_down_exps = nullptr ;
2871+ struct ggml_tensor * ffn_up_exps = nullptr ;
28782872
28792873 // ff shared expert (shexp)
2880- struct ggml_tensor * ffn_gate_inp_shexp;
2881- struct ggml_tensor * ffn_gate_shexp;
2882- struct ggml_tensor * ffn_down_shexp;
2883- struct ggml_tensor * ffn_up_shexp;
2874+ struct ggml_tensor * ffn_gate_inp_shexp = nullptr ;
2875+ struct ggml_tensor * ffn_gate_shexp = nullptr ;
2876+ struct ggml_tensor * ffn_down_shexp = nullptr ;
2877+ struct ggml_tensor * ffn_up_shexp = nullptr ;
28842878
28852879 // ff bias
2886- struct ggml_tensor * ffn_gate_b;
2887- struct ggml_tensor * ffn_down_b; // b2
2888- struct ggml_tensor * ffn_up_b; // b3
2889- struct ggml_tensor * ffn_act;
2880+ struct ggml_tensor * ffn_gate_b = nullptr ;
2881+ struct ggml_tensor * ffn_down_b = nullptr ; // b2
2882+ struct ggml_tensor * ffn_up_b = nullptr ; // b3
2883+ struct ggml_tensor * ffn_act = nullptr ;
28902884
28912885 // mamba proj
2892- struct ggml_tensor * ssm_in;
2893- struct ggml_tensor * ssm_x;
2894- struct ggml_tensor * ssm_dt;
2895- struct ggml_tensor * ssm_out;
2886+ struct ggml_tensor * ssm_in = nullptr ;
2887+ struct ggml_tensor * ssm_x = nullptr ;
2888+ struct ggml_tensor * ssm_dt = nullptr ;
2889+ struct ggml_tensor * ssm_out = nullptr ;
28962890
28972891 // mamba
2898- struct ggml_tensor * ssm_conv1d;
2899- struct ggml_tensor * ssm_a;
2900- struct ggml_tensor * ssm_d;
2892+ struct ggml_tensor * ssm_conv1d = nullptr ;
2893+ struct ggml_tensor * ssm_a = nullptr ;
2894+ struct ggml_tensor * ssm_d = nullptr ;
29012895
29022896 // mamba bias
2903- struct ggml_tensor * ssm_conv1d_b;
2904- struct ggml_tensor * ssm_dt_b;
2897+ struct ggml_tensor * ssm_conv1d_b = nullptr ;
2898+ struct ggml_tensor * ssm_dt_b = nullptr ;
29052899
29062900 // rwkv
2907- struct ggml_tensor * time_mix_w1;
2908- struct ggml_tensor * time_mix_w2;
2909- struct ggml_tensor * time_mix_lerp_x;
2910- struct ggml_tensor * time_mix_lerp_w;
2911- struct ggml_tensor * time_mix_lerp_k;
2912- struct ggml_tensor * time_mix_lerp_v;
2913- struct ggml_tensor * time_mix_lerp_r;
2914- struct ggml_tensor * time_mix_lerp_g;
2915-
2916- struct ggml_tensor * time_mix_first;
2917- struct ggml_tensor * time_mix_decay;
2918- struct ggml_tensor * time_mix_decay_w1;
2919- struct ggml_tensor * time_mix_decay_w2;
2920- struct ggml_tensor * time_mix_key;
2921- struct ggml_tensor * time_mix_value;
2922- struct ggml_tensor * time_mix_receptance;
2923- struct ggml_tensor * time_mix_gate;
2924-
2925- struct ggml_tensor * time_mix_ln;
2926- struct ggml_tensor * time_mix_ln_b;
2927- struct ggml_tensor * time_mix_output;
2928-
2929- struct ggml_tensor * channel_mix_lerp_k;
2930- struct ggml_tensor * channel_mix_lerp_r;
2931-
2932- struct ggml_tensor * channel_mix_key;
2933- struct ggml_tensor * channel_mix_receptance;
2934- struct ggml_tensor * channel_mix_value;
2901+ struct ggml_tensor * time_mix_w1 = nullptr ;
2902+ struct ggml_tensor * time_mix_w2 = nullptr ;
2903+ struct ggml_tensor * time_mix_lerp_x = nullptr ;
2904+ struct ggml_tensor * time_mix_lerp_w = nullptr ;
2905+ struct ggml_tensor * time_mix_lerp_k = nullptr ;
2906+ struct ggml_tensor * time_mix_lerp_v = nullptr ;
2907+ struct ggml_tensor * time_mix_lerp_r = nullptr ;
2908+ struct ggml_tensor * time_mix_lerp_g = nullptr ;
2909+
2910+ struct ggml_tensor * time_mix_first = nullptr ;
2911+ struct ggml_tensor * time_mix_decay = nullptr ;
2912+ struct ggml_tensor * time_mix_decay_w1 = nullptr ;
2913+ struct ggml_tensor * time_mix_decay_w2 = nullptr ;
2914+ struct ggml_tensor * time_mix_key = nullptr ;
2915+ struct ggml_tensor * time_mix_value = nullptr ;
2916+ struct ggml_tensor * time_mix_receptance = nullptr ;
2917+ struct ggml_tensor * time_mix_gate = nullptr ;
2918+
2919+ struct ggml_tensor * time_mix_ln = nullptr ;
2920+ struct ggml_tensor * time_mix_ln_b = nullptr ;
2921+ struct ggml_tensor * time_mix_output = nullptr ;
2922+
2923+ struct ggml_tensor * channel_mix_lerp_k = nullptr ;
2924+ struct ggml_tensor * channel_mix_lerp_r = nullptr ;
2925+
2926+ struct ggml_tensor * channel_mix_key = nullptr ;
2927+ struct ggml_tensor * channel_mix_receptance = nullptr ;
2928+ struct ggml_tensor * channel_mix_value = nullptr ;
29352929
29362930 // long rope factors
29372931 struct ggml_tensor * rope_long = nullptr;
29382932 struct ggml_tensor * rope_short = nullptr;
29392933 struct ggml_tensor * rope_freqs = nullptr;
29402934
29412935 // bitnet scale
2942- struct ggml_tensor * wq_scale;
2943- struct ggml_tensor * wk_scale;
2944- struct ggml_tensor * wv_scale;
2945- struct ggml_tensor * wo_scale;
2946- struct ggml_tensor * ffn_gate_scale;
2947- struct ggml_tensor * ffn_up_scale;
2948- struct ggml_tensor * ffn_down_scale;
2936+ struct ggml_tensor * wq_scale = nullptr ;
2937+ struct ggml_tensor * wk_scale = nullptr ;
2938+ struct ggml_tensor * wv_scale = nullptr ;
2939+ struct ggml_tensor * wo_scale = nullptr ;
2940+ struct ggml_tensor * ffn_gate_scale = nullptr ;
2941+ struct ggml_tensor * ffn_up_scale = nullptr ;
2942+ struct ggml_tensor * ffn_down_scale = nullptr ;
29492943
29502944 struct llama_layer_posnet posnet;
29512945
@@ -3167,6 +3161,7 @@ struct llama_sbatch {
31673161 // batch indices of the output
31683162 std::vector<size_t> out_ids;
31693163 std::vector<llama_sbatch_seq> seq;
3164+
31703165 const llama_batch * batch = nullptr;
31713166
31723167 // buffers for the ubatch
0 commit comments