fix: rerank detection

thxCode · thxCode · commit ff0593e3d534 · 2026-01-06T19:59:12.000+08:00
Signed-off-by: thxCode &lt;thxcode0824@gmail.com&gt;
diff --git a/file_architecture.go b/file_architecture.go
@@ -117,6 +117,8 @@ type (
 		RoPEScalingOriginalContextLength uint64 `json:"ropeScalingOriginalContextLength,omitempty"`
 		// RoPEScalingFinetuned is true if the RoPE scaling is fine-tuned.
 		RoPEScalingFinetuned bool `json:"ropeScalingFinetuned,omitempty"`
+		// PoolingType is the type of pooling used in the model.
+		PoolingType uint32 `json:"poolingType,omitempty"`
 		// SSMConvolutionKernel is the size of the convolution kernel used in the Selective State Space Model (SSM) and similar architectures.
 		SSMConvolutionKernel uint32 `json:"ssmConvolutionKernel,omitempty"`
 		// SSMInnerSize is the embedding size of the state in SSM and similar architectures.
@@ -857,6 +859,8 @@ func (gf *GGUFFile) transformerArchitecture(arch string) (ga GGUFArchitecture) {
 		ropeScalingOriginalContextKey = arch + ".rope.scaling.original_context_length" // uint32 maybe
 		ropeScalingFinetunedKey       = arch + ".rope.scaling.finetuned"
 
+		poolingTypeKey = arch + ".pooling_type"
+
 		ssmConvolutionKernelKey = arch + ".ssm.conv_kernel"
 		ssmInnerSizeKey         = arch + ".ssm.inner_size"
 		ssmStateSizeKey         = arch + ".ssm.state_size"
@@ -910,6 +914,7 @@ func (gf *GGUFFile) transformerArchitecture(arch string) (ga GGUFArchitecture) {
 		ropeScalingFactorKey,
 		ropeScalingOriginalContextKey,
 		ropeScalingFinetunedKey,
+		poolingTypeKey,
 		ssmConvolutionKernelKey,
 		ssmInnerSizeKey,
 		ssmStateSizeKey,
@@ -1098,6 +1103,13 @@ func (gf *GGUFFile) transformerArchitecture(arch string) (ga GGUFArchitecture) {
 		ga.RoPEScalingFinetuned = v.ValueBool()
 	}
 
+	if v, ok := m[poolingTypeKey]; ok {
+		ga.PoolingType = v.ValueUint32()
+		if ga.AttentionCausal && ga.PoolingType > 2 {
+			ga.AttentionCausal = false
+		}
+	}
+
 	if v, ok := m[ssmConvolutionKernelKey]; ok {
 		ga.SSMConvolutionKernel = ValueNumeric[uint32](v)
 	}
diff --git a/file_estimate__llamacpp.go b/file_estimate__llamacpp.go
@@ -373,6 +373,9 @@ func (gf *GGUFFile) estimateLLaMACppRunInModel(o *_GGUFRunEstimateOptions, a *GG
 		if _, found := gf.TensorInfos.Index([]string{"cls.bias", "cls.weight"}); found > 0 {
 			e.Reranking = true
 		}
+		if !e.Reranking && a.PoolingType == 4 { // 0: None, 1: Mean, 2: Cls, 3: Last, 4: Rank
+			e.Reranking = true
+		}
 	}
 
 	// Distributable,

Original file line number	Diff line number	Diff line change
`@@ -373,6 +373,9 @@ func (gf GGUFFile) estimateLLaMACppRunInModel(o _GGUFRunEstimateOptions, a *GG`
`373`	`373`	`if _, found := gf.TensorInfos.Index([]string{"cls.bias", "cls.weight"}); found > 0 {`
`374`	`374`	`e.Reranking = true`
`375`	`375`	`}`
	`376`	`+ if !e.Reranking && a.PoolingType == 4 { // 0: None, 1: Mean, 2: Cls, 3: Last, 4: Rank`
	`377`	`+ e.Reranking = true`
	`378`	`+ }`
`376`	`379`	`}`
`377`	`380`
`378`	`381`	`// Distributable,`