| 
 | 1 | +package convert  | 
 | 2 | + | 
 | 3 | +import (  | 
 | 4 | +	"slices"  | 
 | 5 | +	"strings"  | 
 | 6 | + | 
 | 7 | +	"github.com/ollama/ollama/fs/ggml"  | 
 | 8 | +	"github.com/pdevine/tensor"  | 
 | 9 | +	"github.com/pdevine/tensor/native"  | 
 | 10 | +)  | 
 | 11 | + | 
 | 12 | +type qwen3Model struct {  | 
 | 13 | +	ModelParameters  | 
 | 14 | +	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`  | 
 | 15 | +	HiddenSize            uint32  `json:"hidden_size"`  | 
 | 16 | +	HiddenLayers          uint32  `json:"num_hidden_layers"`  | 
 | 17 | +	IntermediateSize      uint32  `json:"intermediate_size"`  | 
 | 18 | +	NumAttentionHeads     uint32  `json:"num_attention_heads"`  | 
 | 19 | +	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`  | 
 | 20 | +	HeadDim               uint32  `json:"head_dim"`  | 
 | 21 | +	NumExperts            uint32  `json:"num_experts"`  | 
 | 22 | +	NumExpertsPerToken    uint32  `json:"num_experts_per_tok"`  | 
 | 23 | +	NormTopkProb          bool    `json:"norm_topk_prob"`  | 
 | 24 | +	RopeTheta             float32 `json:"rope_theta"`  | 
 | 25 | +	RopeScaling           struct {  | 
 | 26 | +		Type                          string     `json:"type"`  | 
 | 27 | +		Factor                        ropeFactor `json:"factor"`  | 
 | 28 | +		OriginalMaxPositionEmbeddings uint32     `json:"original_max_position_embeddings"`  | 
 | 29 | +		MropeSection                  []int32    `json:"mrope_section"`  | 
 | 30 | +	} `json:"rope_scaling"`  | 
 | 31 | +	RMSNormEPS float32 `json:"rms_norm_eps"`  | 
 | 32 | +}  | 
 | 33 | + | 
 | 34 | +// KV implements ModelConverter.  | 
 | 35 | +func (q *qwen3Model) KV(t *Tokenizer) ggml.KV {  | 
 | 36 | +	arch := "qwen3"  | 
 | 37 | +	if q.NumExperts > 0 {  | 
 | 38 | +		arch += "moe"  | 
 | 39 | +	}  | 
 | 40 | + | 
 | 41 | +	kv := q.ModelParameters.KV(t)  | 
 | 42 | +	kv["general.architecture"] = arch  | 
 | 43 | +	kv["block_count"] = q.HiddenLayers  | 
 | 44 | +	kv["context_length"] = q.MaxPositionEmbeddings  | 
 | 45 | +	kv["embedding_length"] = q.HiddenSize  | 
 | 46 | +	kv["feed_forward_length"] = q.IntermediateSize  | 
 | 47 | +	kv["attention.head_count"] = q.NumAttentionHeads  | 
 | 48 | +	kv["attention.head_count_kv"] = q.NumKeyValueHeads  | 
 | 49 | +	kv["attention.key_length"] = q.HeadDim  | 
 | 50 | +	kv["attention.value_length"] = q.HeadDim  | 
 | 51 | + | 
 | 52 | +	if q.NumExperts > 0 {  | 
 | 53 | +		kv["expert_count"] = q.NumExperts  | 
 | 54 | +		kv["expert_used_count"] = q.NumExpertsPerToken  | 
 | 55 | +		kv["norm_top_k_prob"] = q.NormTopkProb  | 
 | 56 | +	}  | 
 | 57 | + | 
 | 58 | +	kv["rope.freq_base"] = q.RopeTheta  | 
 | 59 | +	kv["attention.layer_norm_rms_epsilon"] = q.RMSNormEPS  | 
 | 60 | + | 
 | 61 | +	switch q.RopeScaling.Type {  | 
 | 62 | +	case "":  | 
 | 63 | +		// no scaling  | 
 | 64 | +	case "yarn":  | 
 | 65 | +		kv["rope.scaling.type"] = q.RopeScaling.Type  | 
 | 66 | +		kv["rope.scaling.factor"] = q.RopeScaling.Factor  | 
 | 67 | +	case "mrope", "default":  | 
 | 68 | +		kv["rope.mrope_section"] = q.RopeScaling.MropeSection  | 
 | 69 | +	default:  | 
 | 70 | +		panic("unknown rope scaling type")  | 
 | 71 | +	}  | 
 | 72 | +	return kv  | 
 | 73 | +}  | 
 | 74 | + | 
 | 75 | +// Tensors implements ModelConverter.  | 
 | 76 | +func (q *qwen3Model) Tensors(ts []Tensor) []*ggml.Tensor {  | 
 | 77 | +	var out []*ggml.Tensor  | 
 | 78 | + | 
 | 79 | +	// TODO: handle split experts  | 
 | 80 | + | 
 | 81 | +	for _, t := range ts {  | 
 | 82 | +		switch {  | 
 | 83 | +		case strings.Contains(t.Name(), "ffn_gate_up_exps"):  | 
 | 84 | +			afterFunc := func(t tensor.Tensor) (tensor.Tensor, error) { return tensor.Transpose(t, 0, 2, 1) }  | 
 | 85 | +			for t := range splitDim(t, 2,  | 
 | 86 | +				split{Replacer: strings.NewReplacer("gate_up", "gate"), afterFunc: afterFunc},  | 
 | 87 | +				split{Replacer: strings.NewReplacer("gate_up", "up"), afterFunc: afterFunc},  | 
 | 88 | +			) {  | 
 | 89 | +				t.Shape[1], t.Shape[2] = t.Shape[2], t.Shape[1]  | 
 | 90 | +				out = append(out, t)  | 
 | 91 | +			}  | 
 | 92 | +		case strings.Contains(t.Name(), "ffn_down_exps"):  | 
 | 93 | +			shape := slices.Clone(t.Shape())  | 
 | 94 | +			shape[1], shape[2] = shape[2], shape[1]  | 
 | 95 | +			t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {  | 
 | 96 | +				dims := make([]int, len(shape))  | 
 | 97 | +				for i := range shape {  | 
 | 98 | +					dims[i] = int(shape[i])  | 
 | 99 | +				}  | 
 | 100 | + | 
 | 101 | +				var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))  | 
 | 102 | +				tt, err := tensor.Transpose(tt, 0, 2, 1)  | 
 | 103 | +				if err != nil {  | 
 | 104 | +					return nil, err  | 
 | 105 | +				}  | 
 | 106 | + | 
 | 107 | +				// flatten tensor so it can be written as a vector  | 
 | 108 | +				if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {  | 
 | 109 | +					return nil, err  | 
 | 110 | +				}  | 
 | 111 | + | 
 | 112 | +				return native.VectorF32(tt.(*tensor.Dense))  | 
 | 113 | +			})  | 
 | 114 | +			out = append(out, &ggml.Tensor{  | 
 | 115 | +				Name:     t.Name(),  | 
 | 116 | +				Kind:     t.Kind(),  | 
 | 117 | +				Shape:    shape,  | 
 | 118 | +				WriterTo: t,  | 
 | 119 | +			})  | 
 | 120 | +		default:  | 
 | 121 | +			out = append(out, &ggml.Tensor{  | 
 | 122 | +				Name:     t.Name(),  | 
 | 123 | +				Kind:     t.Kind(),  | 
 | 124 | +				Shape:    t.Shape(),  | 
 | 125 | +				WriterTo: t,  | 
 | 126 | +			})  | 
 | 127 | +		}  | 
 | 128 | +	}  | 
 | 129 | + | 
 | 130 | +	return out  | 
 | 131 | +}  | 
 | 132 | + | 
 | 133 | +// Replacements implements ModelConverter.  | 
 | 134 | +func (q *qwen3Model) Replacements() []string {  | 
 | 135 | +	return []string{  | 
 | 136 | +		"lm_head", "output",  | 
 | 137 | +		"model.embed_tokens", "token_embd",  | 
 | 138 | +		"model.layers", "blk",  | 
 | 139 | +		"input_layernorm", "attn_norm",  | 
 | 140 | +		"self_attn.k_proj", "attn_k",  | 
 | 141 | +		"self_attn.k_norm", "attn_k_norm",  | 
 | 142 | +		"self_attn.v_proj", "attn_v",  | 
 | 143 | +		"self_attn.q_proj", "attn_q",  | 
 | 144 | +		"self_attn.q_norm", "attn_q_norm",  | 
 | 145 | +		"self_attn.o_proj", "attn_output",  | 
 | 146 | +		"mlp.down_proj", "ffn_down",  | 
 | 147 | +		"mlp.gate_proj", "ffn_gate",  | 
 | 148 | +		"mlp.up_proj", "ffn_up",  | 
 | 149 | +		"mlp.gate.weight", "ffn_gate_inp.weight",  | 
 | 150 | +		"mlp.experts.down_proj", "ffn_down_exps.weight",  | 
 | 151 | +		"mlp.experts.gate_up_proj", "ffn_gate_up_exps.weight",  | 
 | 152 | +		"post_attention_layernorm", "ffn_norm",  | 
 | 153 | +		"model.norm", "output_norm",  | 
 | 154 | +	}  | 
 | 155 | +}  | 
 | 156 | + | 
 | 157 | +var _ ModelConverter = (*qwen3Model)(nil)  | 
0 commit comments