feat: optimally separate result sets in query adapter (#149)

lsorber · web-flow · commit bddb36bf422a · 2025-06-02T15:58:20.000+02:00
diff --git a/src/raglite/_query_adapter.py b/src/raglite/_query_adapter.py
@@ -1,6 +1,11 @@
 """Compute and update an optimal query adapter."""
 
+# ruff: noqa: N806
+
+from dataclasses import replace
+
 import numpy as np
+from scipy.optimize import lsq_linear
 from sqlalchemy import text
 from sqlalchemy.orm.attributes import flag_modified
 from sqlmodel import Session, col, select
@@ -10,13 +15,32 @@
 from raglite._database import Chunk, ChunkEmbedding, Eval, IndexMetadata, create_database_engine
 from raglite._embed import embed_strings
 from raglite._search import vector_search
-from raglite._typing import FloatMatrix
+from raglite._typing import FloatMatrix, FloatVector
+
+
+def _optimize_query_target(
+    q: FloatVector,
+    P: FloatMatrix,  # noqa: N803,
+    N: FloatMatrix,  # noqa: N803,
+    *,
+    α: float = 0.05,  # noqa: PLC2401
+) -> FloatVector:
+    # Convert to double precision for the optimizer.
+    q_dtype = q.dtype
+    q, P, N = q.astype(np.float64), P.astype(np.float64), N.astype(np.float64)
+    # Construct the constraint matrix D := P - (1 + α) * N.  # noqa: RUF003
+    D = np.reshape(P[:, np.newaxis, :] - (1.0 + α) * N[np.newaxis, :, :], (-1, P.shape[1]))
+    # Solve the dual problem min_μ ½ ‖q + Dᵀ μ‖² s.t. μ ≥ 0.
+    A, b = D.T, -q
+    μ_star = lsq_linear(A, b, bounds=(0.0, np.inf), tol=np.finfo(A.dtype).eps).x  # noqa: PLC2401
+    # Recover the primal solution q* = q + Dᵀ μ*.
+    q_star: FloatVector = (q + D.T @ μ_star).astype(q_dtype)
+    return q_star
 
 
-def update_query_adapter(  # noqa: C901, PLR0915
+def update_query_adapter(  # noqa: PLR0915
     *,
-    max_triplets: int = 4096,
-    max_triplets_per_eval: int = 64,
+    max_evals: int = 4096,
     optimize_top_k: int = 40,
     optimize_gap: float = 0.05,
     config: RAGLiteConfig | None = None,
@@ -28,57 +52,75 @@ def update_query_adapter(  # noqa: C901, PLR0915
     order to improve the quality of the search results.
 
     Given a set of triplets (qᵢ, pᵢ, nᵢ), we want to find the query adapter A that increases the
-    score pᵢ'qᵢ of the positive chunk pᵢ and decreases the score nᵢ'qᵢ of the negative chunk nᵢ.
+    score pᵢᵀqᵢ of the positive chunk pᵢ and decreases the score nᵢᵀqᵢ of the negative chunk nᵢ.
 
     If the nearest neighbour search uses the dot product as its relevance score, we can find the
     optimal query adapter by solving the following relaxed Procrustes optimisation problem with a
     bound on the Frobenius norm of A:
 
-    A* = argmax Σᵢ pᵢ' (A qᵢ) - nᵢ' (A qᵢ)
-                Σᵢ (pᵢ - nᵢ)' A qᵢ
-                trace[ (P - N) A Q' ]  where  Q := [q₁'; ...; qₖ']
-                                              P := [p₁'; ...; pₖ']
-                                              N := [n₁'; ...; nₖ']
-                trace[ Q' (P - N) A ]
-                trace[ M A ]           where  M := Q' (P - N)
-           s.t. ||A||_F == 1
-       = M' / ||M||_F
+    A* := argmax Σᵢ pᵢᵀ (A qᵢ) - nᵢᵀ (A qᵢ)
+                 Σᵢ (pᵢ - nᵢ)ᵀ A qᵢ
+                 trace[ (P - N) A Qᵀ ]  where  Q := [q₁ᵀ; ...; qₖᵀ]
+                                               P := [p₁ᵀ; ...; pₖᵀ]
+                                               N := [n₁ᵀ; ...; nₖᵀ]
+                 trace[ Qᵀ (P - N) A ]
+                 trace[ Mᵀ A ]          where  M := (P - N)ᵀ Q
+            s.t. ||A||_F == 1
+        = M / ||M||_F
 
     If the nearest neighbour search uses the cosine similarity as its relevance score, we can find
     the optimal query adapter by solving the following orthogonal Procrustes optimisation problem
-    with an orthogonality constraint on A:
-
-    A* = argmax Σᵢ pᵢ' (A qᵢ) - nᵢ' (A qᵢ)
-                Σᵢ (pᵢ - nᵢ)' A qᵢ
-                trace[ (P - N) A Q' ]
-                trace[ Q' (P - N) A ]
-                trace[ M A ]
-                trace[ U Σ V' A ]      where  U Σ V' := M is the SVD of M
-                trace[ Σ V' A U ]
-           s.t. A'A == 𝕀
-       = V U'
-
-    Additionally, we want to limit the effect of A* so that it adjusts q just enough to invert
-    incorrectly ordered (q, p, n) triplets, but not so much as to affect the correctly ordered ones.
-    To achieve this, we'll rewrite M as α(M / s) + (1 - α)𝕀, where s scales M to the same norm as 𝕀,
-    and choose the smallest α that ranks (q, p, n) correctly. If α = 0, the relevance score gap
-    between an incorrect (p, n) pair would be B := (p - n)' q < 0. If α = 1, the relevance score gap
-    would be A := (p - n)' (p - n) / ||p - n|| > 0. For a target relevance score gap of say
-    C := 5% * A, the optimal α is then given by αA + (1 - α)B = C => α = (B - C) / (B - A).
+    [1] with an orthogonality constraint on A:
+
+    A* := argmax Σᵢ pᵢᵀ (A qᵢ) - nᵢᵀ (A qᵢ)
+                 Σᵢ (pᵢ - nᵢ)ᵀ A qᵢ
+                 trace[ (P - N) A Qᵀ ]
+                 trace[ Qᵀ (P - N) A ]
+                 trace[ Mᵀ A ]
+                 trace[ (U Σ V)ᵀ A ]      where  U Σ Vᵀ := M is the SVD of M
+                 trace[ Σ V A Uᵀ ]
+            s.t. AᵀA == 𝕀
+        = U Vᵀ
+
+    The action of A* is to map a query embedding qᵢ to a target vector t := (pᵢ - nᵢ) that maximally
+    separates the positive and negative chunks. For a given query embedding qᵢ, a retrieval method
+    will yield a result set containing both positive and negative chunks. Instead of extracting
+    multiple triplets (qᵢ, pᵢ, nᵢ) from each such result set, we can compute a single optimal target
+    vector t* for the query embedding qᵢ as follows:
+
+    t* := argmax ½ ||t - qᵢ||²
+            s.t. Dᵢ t >= 0
+
+    where the constraint matrix Dᵢ := [pₘᵀ - (1 + α) nₙᵀ]ₘₙ comprises all pairs of positive and
+    negative chunk embeddings in the result set corresponding to the query embedding qᵢ. This
+    optimisation problem expresses the idea that the target vector t* should be as close as
+    possible to the query embedding qᵢ, while separating all positive and negative chunk embeddings
+    in the result set by a margin of at least α. To solve this problem, we'll first introduce
+    a Lagrangian with Lagrange multipliers μ:
+
+    L(t, μ) := ½ ||t - qᵢ||² + μᵀ (-Dᵢ t)
+
+    Now we can set the gradient of the Lagrangian to zero to find the optimal target vector t*:
+
+    ∇ₜL = t - qᵢ - Dᵢᵀ μ = 0
+    t* = qᵢ + Dᵢᵀ μ*
+
+    where μ* is the solution to the dual nonnegative least squares problem
+
+    μ* := argmin ½ ||qᵢ + Dᵢᵀ μ||²
+            s.t. μ >= 0
 
     Parameters
     ----------
-    max_triplets
-        The maximum number of (q, p, n) triplets to compute. Each triplet corresponds to a rank-one
-        update of the query adapter A.
-    max_triplets_per_eval
-        The maximum number of (q, p, n) triplets a single eval may contribute to the query adapter.
+    max_evals
+        The maximum number of evals to use to compute the query adapter. Each eval corresponds to a
+        rank-one update of the query adapter A.
     optimize_top_k
-        The number of search results per eval to extract (q, p, n) triplets from.
+        The number of search results per eval to optimize.
     optimize_gap
-        The strength of the query adapter, expressed as a fraction between 0 and 1 of the maximum
-        relevance score gap. Should be large enough to correct incorrectly ranked results, but small
-        enough to not affect correctly ranked results.
+        The strength of the query adapter, expressed as a nonnegative number. Should be large enough
+        to correct incorrectly ranked results, but small enough to not affect correctly ranked
+        results.
     config
         The RAGLite config to use to construct and store the query adapter.
 
@@ -87,7 +129,7 @@ def update_query_adapter(  # noqa: C901, PLR0915
     ValueError
         If no documents have been inserted into the database yet.
     ValueError
-        If there aren't enough evals to compute the query adapter yet.
+        If no evals have been inserted into the database yet.
     ValueError
         If the `config.vector_search_distance_metric` is not supported.
 
@@ -97,98 +139,69 @@ def update_query_adapter(  # noqa: C901, PLR0915
         The query adapter.
     """
     config = config or RAGLiteConfig()
-    config_no_query_adapter = RAGLiteConfig(
-        **{**config.__dict__, "vector_search_query_adapter": False}
-    )
+    config_no_query_adapter = replace(config, vector_search_query_adapter=False)
     engine = create_database_engine(config)
     with Session(engine) as session:
         # Get random evals from the database.
         chunk_embedding = session.exec(select(ChunkEmbedding).limit(1)).first()
         if chunk_embedding is None:
             error_message = "First run `insert_document()` to insert documents."
             raise ValueError(error_message)
-        evals = session.exec(select(Eval).order_by(Eval.id).limit(max_triplets)).all()
-        # Exit if there aren't enough evals to compute the query adapter.
-        embedding_dim = len(chunk_embedding.embedding)
-        required_evals = np.ceil(embedding_dim / max_triplets_per_eval) - len(evals)
-        if required_evals > 0:
-            error_message = f"First run `insert_evals()` to generate {required_evals} more evals."
+        evals = session.exec(select(Eval).order_by(Eval.id).limit(max_evals)).all()
+        if len(evals) == 0:
+            error_message = "First run `insert_evals()` to generate evals."
             raise ValueError(error_message)
-        # Loop over the evals to generate (q, p, n) triplets.
-        Q = np.zeros((0, embedding_dim))  # noqa: N806
-        P = np.zeros_like(Q)  # noqa: N806
-        N = np.zeros_like(Q)  # noqa: N806
-        for eval_ in tqdm(
-            evals, desc="Extracting triplets from evals", unit="eval", dynamic_ncols=True
-        ):
+        # Construct the query and target matrices.
+        Q = np.zeros((0, len(chunk_embedding.embedding)))
+        T = np.zeros_like(Q)
+        for eval_ in tqdm(evals, desc="Optimizing evals", unit="eval", dynamic_ncols=True):
             # Embed the question.
-            question_embedding = embed_strings([eval_.question], config=config)
+            q = embed_strings([eval_.question], config=config)[0]
             # Retrieve chunks that would be used to answer the question.
             chunk_ids, _ = vector_search(
-                question_embedding[0], num_results=optimize_top_k, config=config_no_query_adapter
+                q, num_results=optimize_top_k, config=config_no_query_adapter
             )
             retrieved_chunks = session.exec(select(Chunk).where(col(Chunk.id).in_(chunk_ids))).all()
             retrieved_chunks = sorted(retrieved_chunks, key=lambda chunk: chunk_ids.index(chunk.id))
-            # Extract (q, p, n) triplets from the eval.
-            num_triplets = 0
-            for i, retrieved_chunk in enumerate(retrieved_chunks):
-                # Only loop over irrelevant chunks.
-                if retrieved_chunk.id not in eval_.chunk_ids:
-                    continue
-                irrelevant_chunk = retrieved_chunk
-                # Grab the negative chunk embedding of this irrelevant chunk.
-                n_top = irrelevant_chunk.embedding_matrix[
-                    [np.argmax(irrelevant_chunk.embedding_matrix @ question_embedding.T)]
+            # Skip this eval if it doesn't contain both relevant and irrelevant chunks.
+            is_relevant = np.array([chunk.id in eval_.chunk_ids for chunk in retrieved_chunks])
+            if not np.any(is_relevant) or not np.any(~is_relevant):
+                continue
+            # Extract the positive and negative chunk embeddings.
+            P = np.vstack(
+                [
+                    chunk.embedding_matrix[[np.argmax(chunk.embedding_matrix @ q)]]
+                    for chunk in np.array(retrieved_chunks)[is_relevant]
                 ]
-                # Grab the positive chunk embeddings that are ranked lower than the negative one.
-                p_top = [
-                    chunk.embedding_matrix[
-                        [np.argmax(chunk.embedding_matrix @ question_embedding.T)]
-                    ]
-                    for chunk in retrieved_chunks[i + 1 :]  # Chunks that are ranked lower.
-                    if chunk is not None and chunk.id in eval_.chunk_ids
+            )
+            N = np.vstack(
+                [
+                    chunk.embedding_matrix[[np.argmax(chunk.embedding_matrix @ q)]]
+                    for chunk in np.array(retrieved_chunks)[~is_relevant]
                 ]
-                # Ensure that we only have (q, p, n) triplets for which p is ranked lower than n.
-                p_top = [p for p in p_top if (n_top - p) @ question_embedding.T > 0]
-                if not p_top:
-                    continue
-                # Stack the (q, p, n) triplets.
-                p = np.vstack(p_top)
-                n = np.repeat(n_top, p.shape[0], axis=0)
-                q = np.repeat(question_embedding, p.shape[0], axis=0)
-                num_triplets += p.shape[0]
-                # Append the (q, p, n) triplets to the Q, P, N matrices.
-                Q = np.vstack([Q, q])  # noqa: N806
-                P = np.vstack([P, p])  # noqa: N806
-                N = np.vstack([N, n])  # noqa: N806
-                # Stop if we have enough triplets for this eval.
-                if num_triplets >= max_triplets_per_eval:
-                    break
-            # Stop if we have enough triplets to compute the query adapter.
-            if Q.shape[0] > max_triplets:
-                Q, P, N = Q[:max_triplets, :], P[:max_triplets, :], N[:max_triplets, :]  # noqa: N806
-                break
-        # Normalise the rows of Q, P, N.
-        Q /= np.linalg.norm(Q, axis=1, keepdims=True)  # noqa: N806
-        P /= np.linalg.norm(P, axis=1, keepdims=True)  # noqa: N806
-        N /= np.linalg.norm(N, axis=1, keepdims=True)  # noqa: N806
-        # Compute the optimal weighted query adapter A*.
-        # TODO: Matmul in float16 is extremely slow compared to single or double precision, why?
-        gap_before = np.sum((P - N) * Q, axis=1)
-        gap_after = 2 * (1 - np.sum(P * N, axis=1)) / np.linalg.norm(P - N, axis=1)
-        gap_target = optimize_gap * gap_after
-        α = (gap_before - gap_target) / (gap_before - gap_after)  # noqa: PLC2401
-        MT = (α[:, np.newaxis] * (P - N)).T @ Q  # noqa: N806
-        s = np.linalg.norm(MT, ord="fro") / np.sqrt(MT.shape[0])
-        MT = np.mean(α) * (MT / s) + np.mean(1 - α) * np.eye(Q.shape[1])  # noqa: N806
-        A_star: FloatMatrix  # noqa: N806
+            )
+            # Compute the optimal target vector t for this query embedding q.
+            t = _optimize_query_target(q, P, N, α=optimize_gap)
+            Q = np.vstack([Q, q[np.newaxis, :]])
+            T = np.vstack([T, t[np.newaxis, :]])
+        # Normalise the rows of Q and T.
+        Q /= np.linalg.norm(Q, axis=1, keepdims=True)
+        if config.vector_search_distance_metric == "cosine":
+            T /= np.linalg.norm(T, axis=1, keepdims=True)
+        # Compute the optimal unconstrained query adapter M.
+        n, d = Q.shape
+        M = (1 / n) * T.T @ Q
+        if n < d or np.linalg.matrix_rank(Q) < d:
+            M += np.eye(d) - Q.T @ np.linalg.pinv(Q @ Q.T) @ Q
+        # Compute the optimal constrained query adapter A* from M, given the distance metric.
+        A_star: FloatMatrix
         if config.vector_search_distance_metric == "dot":
             # Use the relaxed Procrustes solution.
-            A_star = MT / np.linalg.norm(MT, ord="fro")  # noqa: N806
+            A_star = M / np.linalg.norm(M, ord="fro") * np.sqrt(d)
         elif config.vector_search_distance_metric == "cosine":
             # Use the orthogonal Procrustes solution.
-            U, _, VT = np.linalg.svd(MT, full_matrices=False)  # noqa: N806
-            A_star = U @ VT  # noqa: N806
+            U, _, VT = np.linalg.svd(M, full_matrices=False)
+            A_star = U @ VT
         else:
             error_message = f"Unsupported metric: {config.vector_search_distance_metric}"
             raise ValueError(error_message)
@@ -200,4 +213,6 @@ def update_query_adapter(  # noqa: C901, PLR0915
         session.commit()
         if engine.dialect.name == "duckdb":
             session.execute(text("CHECKPOINT;"))
+        # Clear the index metadata cache to allow the new query adapter to be used.
+        IndexMetadata._get.cache_clear()  # noqa: SLF001
     return A_star
diff --git a/tests/test_query_adapter.py b/tests/test_query_adapter.py
@@ -0,0 +1,40 @@
+"""Test RAGLite's query adapter."""
+
+from dataclasses import replace
+
+import numpy as np
+import pytest
+
+from raglite import RAGLiteConfig, insert_evals, update_query_adapter, vector_search
+from raglite._database import IndexMetadata
+
+
+@pytest.mark.slow
+def test_query_adapter(raglite_test_config: RAGLiteConfig) -> None:
+    """Test the query adapter update functionality."""
+    # Create a config with and without the query adapter enabled.
+    config_with_query_adapter = replace(raglite_test_config, vector_search_query_adapter=True)
+    config_without_query_adapter = replace(raglite_test_config, vector_search_query_adapter=False)
+    # Verify that there is no query adapter in the database.
+    Q = IndexMetadata.get("default", config=config_without_query_adapter).get("query_adapter")  # noqa: N806
+    assert Q is None
+    # Insert evals.
+    insert_evals(num_evals=2, max_contexts_per_eval=10, config=config_with_query_adapter)
+    # Update the query adapter.
+    A = update_query_adapter(config=config_with_query_adapter)  # noqa: N806
+    assert isinstance(A, np.ndarray)
+    assert A.ndim == 2  # noqa: PLR2004
+    assert A.shape[0] == A.shape[1]
+    assert np.isfinite(A).all()
+    # Verify that there is a query adapter in the database.
+    Q = IndexMetadata.get("default", config=config_without_query_adapter).get("query_adapter")  # noqa: N806
+    assert isinstance(Q, np.ndarray)
+    assert Q.ndim == 2  # noqa: PLR2004
+    assert Q.shape[0] == Q.shape[1]
+    assert np.isfinite(Q).all()
+    assert np.all(A == Q)
+    # Verify that the query adapter affects the results of vector search.
+    query = "How does Einstein define 'simultaneous events' in his special relativity paper?"
+    _, scores_qa = vector_search(query, config=config_with_query_adapter)
+    _, scores_no_qa = vector_search(query, config=config_without_query_adapter)
+    assert scores_qa != scores_no_qa