neuml · davidmezzetti · Feb 18, 2026 · Feb 17, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/src/python/txtai/embeddings/search/__init__.py b/src/python/txtai/embeddings/search/__init__.py
@@ -5,6 +5,7 @@
 from .base import Search
 from .errors import *
 from .explain import Explain
+from .hybrid import Hybrid
 from .ids import Ids
 from .query import Query
 from .scan import Scan

diff --git a/src/python/txtai/embeddings/search/base.py b/src/python/txtai/embeddings/search/base.py
@@ -5,6 +5,7 @@
 import logging
 
 from .errors import IndexNotFoundError
+from .hybrid import Hybrid
 from .scan import Scan
 
 # Logging configuration
@@ -113,27 +114,9 @@ def search(self, queries, limit, weights, index):
             if isinstance(weights, (int, float)):
                 weights = [weights, 1 - weights]
 
-            # Create weighted scores
-            results = []
-            for vectors in zip(dense, sparse):
-                uids = {}
-                for v, scores in enumerate(vectors):
-                    for r, (uid, score) in enumerate(scores if weights[v] > 0 else []):
-                        # Initialize score
-                        if uid not in uids:
-                            uids[uid] = 0.0
-
-                        # Create hybrid score
-                        #  - Convex Combination when sparse scores are normalized
-                        #  - Reciprocal Rank Fusion (RRF) when sparse scores aren't normalized
-                        if self.scoring.isnormalized():
-                            uids[uid] += score * weights[v]
-                        else:
-                            uids[uid] += (1.0 / (r + 1)) * weights[v]
-
-                results.append(sorted(uids.items(), key=lambda x: x[1], reverse=True)[:limit])
-
-            return results
+            # Create weighted scores via hybrid fusion strategy
+            fusion = Hybrid(self.scoring)
+            return [fusion(vectors, weights, limit) for vectors in zip(dense, sparse)]
 
         # Raise an error if when no indexes are available
         if not sparse and not dense:

diff --git a/src/python/txtai/embeddings/search/hybrid.py b/src/python/txtai/embeddings/search/hybrid.py
@@ -0,0 +1,199 @@
+"""
+Hybrid module
+"""
+
+import math
+
+
+# Numerical clamp for log-odds computation
+_EPSILON = 1e-10
+
+
+class Hybrid:
+    """
+    Hybrid score fusion strategies for combining dense and sparse search results.
+
+    Selects a fusion method based on the sparse scoring configuration:
+      - Log-odds conjunction for Bayesian (BB25) normalized scores
+      - Convex combination for default normalized scores
+      - Reciprocal Rank Fusion (RRF) for unnormalized scores
+    """
+
+    def __init__(self, scoring):
+        """
+        Creates a new Hybrid instance.
+
+        Args:
+            scoring: sparse scoring instance
+        """
+
+        if scoring.isbayes():
+            self.method = self.logodds
+        elif scoring.isnormalized():
+            self.method = self.convex
+        else:
+            self.method = self.rrf
+
+    def __call__(self, vectors, weights, limit):
+        """
+        Fuses dense and sparse result vectors into a single ranked list.
+
+        Args:
+            vectors: tuple of (dense_results, sparse_results)
+            weights: [dense_weight, sparse_weight]
+            limit: maximum results
+
+        Returns:
+            sorted list of (uid, score)
+        """
+
+        return self.method(vectors, weights, limit)
+
+    def calibrate(self, dense_raw):
+        """
+        Computes per-query calibration parameters for dense cosine scores.
+
+        Uses the same approach as BB25: beta=median, alpha_eff=1/std so the
+        logit for a dense score is alpha * (score - median), centering the
+        median candidate at logit 0.
+
+        Args:
+            dense_raw: list of raw dense cosine scores
+
+        Returns:
+            (median, alpha) calibration parameters
+        """
+
+        d_median, d_alpha = 0.0, 1.0
+
+        dense_arr = [s for s in dense_raw if s > 0]
+        if dense_arr:
+            d_median = sorted(dense_arr)[len(dense_arr) // 2]
+            d_std = (sum((x - sum(dense_arr) / len(dense_arr)) ** 2 for x in dense_arr) / len(dense_arr)) ** 0.5
+            d_alpha = 1.0 / d_std if d_std > 0 else 1.0
+
+        return d_median, d_alpha
+
+    def logodds(self, vectors, weights, limit):
+        """
+        Log-odds conjunction fusion for Bayesian (BB25) normalized scores.
+
+        Implements the framework from "From Bayesian Inference to Neural Computation"
+        (Jeong, 2026) with asymmetric dynamic calibration:
+
+          1. Calibrate dense cosine scores via per-query dynamic sigmoid
+             (beta=median, alpha_eff=1/std) to produce logits centered at 0.
+          2. Convert sparse BB25 probabilities to logits.
+          3. Fuse via weighted mean log-odds with confidence scaling.
+
+        Scores are returned as raw logits (not mapped back through sigmoid) to
+        preserve ranking resolution among top candidates.
+
+        Args:
+            vectors: tuple of (dense_results, sparse_results)
+            weights: [dense_weight, sparse_weight]
+            limit: maximum results
+
+        Returns:
+            sorted list of (uid, score) where score is a fused logit
+        """
+
+        # Phase 1: Collect raw scores per document
+        uids = {}
+        dense_raw = []
+        for v, scores in enumerate(vectors):
+            for uid, score in scores if weights[v] > 0 else []:
+                if uid not in uids:
+                    uids[uid] = [None, None]
+
+                if v == 0:
+                    uids[uid][0] = score
+                    dense_raw.append(score)
+                else:
+                    # Sparse BB25 score: already a calibrated probability
+                    uids[uid][1] = score
+
+        # Phase 2: Compute per-query calibration parameters for dense cosine scores.
+        # Same approach as BB25: beta=median, alpha_eff=1/std. The logit for a dense
+        # score is alpha * (score - median), centering the median candidate at logit 0.
+        d_median, d_alpha = self.calibrate(dense_raw)
+
+        # Phase 3: Fuse via weighted mean log-odds with confidence scaling.
+        # Raw logit scores are used for ranking instead of sigmoid(logit) to
+        # preserve fine-grained ordering among top candidates.
+        fused = {}
+        n = 2
+        alpha = 0.5
+        scale = n**alpha
+
+        for uid, pair in uids.items():
+            raw_dense = pair[0]
+            p_sparse = pair[1]
+
+            if raw_dense is not None and p_sparse is not None:
+                # Calibrate dense score via dynamic sigmoid
+                logit_d = d_alpha * (raw_dense - d_median)
+                logit_d = max(min(logit_d, 500), -500)
+
+                # Sparse BB25 score -> logit
+                p_s = min(max(p_sparse, _EPSILON), 1.0 - _EPSILON)
+                logit_s = math.log(p_s / (1.0 - p_s))
+
+                # Weighted mean log-odds with confidence scaling (Paper 2, Def 4.2.1)
+                l_bar = weights[0] * logit_d + weights[1] * logit_s
+                fused[uid] = l_bar * scale
+            elif raw_dense is not None:
+                # Only dense signal: calibrated logit scaled by weight
+                logit_d = d_alpha * (raw_dense - d_median)
+                logit_d = max(min(logit_d, 500), -500)
+                fused[uid] = logit_d * weights[0]
+            else:
+                # Only sparse signal: logit scaled by weight
+                p_s = min(max(p_sparse, _EPSILON), 1.0 - _EPSILON)
+                fused[uid] = math.log(p_s / (1.0 - p_s)) * weights[1]
+
+        return sorted(fused.items(), key=lambda x: x[1], reverse=True)[:limit]
+
+    def convex(self, vectors, weights, limit):
+        """
+        Convex combination fusion for default normalized scores.
+
+        Args:
+            vectors: tuple of (dense_results, sparse_results)
+            weights: [dense_weight, sparse_weight]
+            limit: maximum results
+
+        Returns:
+            sorted list of (uid, score)
+        """
+
+        uids = {}
+        for v, scores in enumerate(vectors):
+            for uid, score in scores if weights[v] > 0 else []:
+                if uid not in uids:
+                    uids[uid] = 0.0
+                uids[uid] += score * weights[v]
+
+        return sorted(uids.items(), key=lambda x: x[1], reverse=True)[:limit]
+
+    def rrf(self, vectors, weights, limit):
+        """
+        Reciprocal Rank Fusion for unnormalized scores.
+
+        Args:
+            vectors: tuple of (dense_results, sparse_results)
+            weights: [dense_weight, sparse_weight]
+            limit: maximum results
+
+        Returns:
+            sorted list of (uid, score)
+        """
+
+        uids = {}
+        for v, scores in enumerate(vectors):
+            for r, (uid, _) in enumerate(scores if weights[v] > 0 else []):
+                if uid not in uids:
+                    uids[uid] = 0.0
+                uids[uid] += (1.0 / (r + 1)) * weights[v]
+
+        return sorted(uids.items(), key=lambda x: x[1], reverse=True)[:limit]
diff --git a/src/python/txtai/scoring/base.py b/src/python/txtai/scoring/base.py
@@ -186,3 +186,13 @@ def isnormalized(self):
         """
 
         raise NotImplementedError
+
+    def isbayes(self):
+        """
+        Check if this scoring instance uses Bayesian (BB25) normalization.
+
+        Returns:
+            True if BB25/Bayesian normalization is active, False otherwise
+        """
+
+        return False
diff --git a/src/python/txtai/scoring/normalize.py b/src/python/txtai/scoring/normalize.py
@@ -33,6 +33,19 @@ def __init__(self, config):
         self.beta = self.config.get("beta")
         self.beta = float(self.beta) if self.beta is not None else self.beta
 
+    # BB25-compatible aliases for Bayesian normalization mode.
+    BAYESIAN_METHODS = ("bayes", "bayesian", "bayesian-bm25", "bb25")
+
+    def isbayes(self):
+        """
+        Checks if Bayesian normalization mode is active.
+
+        Returns:
+            True if using BB25/Bayesian normalization
+        """
+
+        return self.method in self.BAYESIAN_METHODS
+
     def __call__(self, scores, avgscore):
         """
         Normalizes scores.
@@ -45,9 +58,7 @@ def __call__(self, scores, avgscore):
             normalized scores
         """
 
-        # BB25-compatible aliases for Bayesian normalization mode.
-        bayesian = ("bayes", "bayesian", "bayesian-bm25", "bb25")
-        return self.bayes(scores) if self.method in bayesian else self.default(scores, avgscore)
+        return self.bayes(scores) if self.isbayes() else self.default(scores, avgscore)
 
     def default(self, scores, avgscore):
         """

diff --git a/src/python/txtai/scoring/tfidf.py b/src/python/txtai/scoring/tfidf.py
@@ -241,6 +241,9 @@ def issparse(self):
     def isnormalized(self):
         return self.normalize
 
+    def isbayes(self):
+        return self.normalizer is not None and self.normalizer.isbayes()
+
     def computefreq(self, tokens):
         """
         Computes token frequency. Used for token weighting.