Skip to content

Commit e9e615c

Browse files
committed
feat(reranking): LLM-as-judge reranker + rerankChain multi-stage pipeline
1 parent ff28f48 commit e9e615c

File tree

4 files changed

+316
-0
lines changed

4 files changed

+316
-0
lines changed

src/rag/reranking/IRerankerService.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,13 @@ export interface RerankerServiceConfig {
173173
/** Enable debug logging */
174174
debug?: boolean;
175175
}
176+
177+
/** A single stage in a reranker chain pipeline. */
178+
export interface RerankChainStage {
179+
/** Provider ID (e.g., 'local', 'cohere', 'llm-judge'). */
180+
provider: string;
181+
/** Max results to keep after this stage. */
182+
topK: number;
183+
/** Optional model override for this stage. */
184+
model?: string;
185+
}

src/rag/reranking/RerankerService.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import type {
1313
RerankerOutput,
1414
RerankerRequestConfig,
1515
RerankerServiceConfig,
16+
RerankChainStage,
1617
RerankerProviderConfig,
1718
} from './IRerankerService';
1819

@@ -249,4 +250,61 @@ export class RerankerService {
249250
};
250251
});
251252
}
253+
254+
/**
255+
* Run chunks through a multi-stage reranker pipeline.
256+
* Each stage's output feeds into the next, narrowing the result set.
257+
* Unavailable providers are silently skipped.
258+
*
259+
* @param query - The search query.
260+
* @param chunks - Input chunks to rerank.
261+
* @param chain - Ordered array of reranking stages.
262+
* @returns Reranked chunks after all stages.
263+
*/
264+
public async rerankChain(
265+
query: string,
266+
chunks: RagRetrievedChunk[],
267+
chain: RerankChainStage[],
268+
): Promise<RagRetrievedChunk[]> {
269+
if (chunks.length === 0 || chain.length === 0) return chunks;
270+
271+
let current = chunks;
272+
const stagesRun: string[] = [];
273+
274+
for (const stage of chain) {
275+
const provider = this.providers.get(stage.provider);
276+
if (!provider) {
277+
this.logger?.debug?.(`rerankChain: Provider '${stage.provider}' not registered — skipping stage`);
278+
continue;
279+
}
280+
281+
try {
282+
const available = await provider.isAvailable();
283+
if (!available) {
284+
this.logger?.debug?.(`rerankChain: Provider '${stage.provider}' not available — skipping stage`);
285+
continue;
286+
}
287+
288+
current = await this.rerankChunks(query, current, {
289+
providerId: stage.provider,
290+
modelId: stage.model ?? '',
291+
topN: stage.topK,
292+
});
293+
stagesRun.push(stage.provider);
294+
} catch (err) {
295+
this.logger?.warn?.(
296+
`rerankChain: Stage '${stage.provider}' failed — continuing with previous results`,
297+
err instanceof Error ? err.message : String(err),
298+
);
299+
}
300+
}
301+
302+
return current.map((chunk) => ({
303+
...chunk,
304+
metadata: {
305+
...chunk.metadata,
306+
_rerankerChainStages: stagesRun.join(','),
307+
},
308+
}));
309+
}
252310
}
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
/**
2+
* @fileoverview LLM-as-Judge Reranker — two-phase hybrid reranking using LLM calls.
3+
*
4+
* Phase 1: Batch pointwise scoring with a cheap model (gpt-4o-mini, haiku).
5+
* Groups documents into batches of 10, asks LLM to score 0-10.
6+
* Phase 2: Listwise final ranking with a synthesis model.
7+
* Takes top-K from phase 1, asks LLM to rank by relevance.
8+
*
9+
* Cognitive science: Combines absolute judgment (pointwise) with comparative
10+
* judgment (listwise) — mirrors how human expert reviewers evaluate documents.
11+
*
12+
* References:
13+
* - Sun, W., et al. (2023). "Is ChatGPT Good at Search? Investigating Large
14+
* Language Models as Re-Ranking Agents." arXiv:2304.09542
15+
* - Qin, Z., et al. (2023). "Large Language Models are Effective Text Rankers
16+
* with Pairwise Ranking Prompting." arXiv:2306.17563
17+
*
18+
* @module agentos/rag/reranking/providers/LlmJudgeReranker
19+
*/
20+
21+
import type {
22+
IRerankerProvider,
23+
RerankerInput,
24+
RerankerOutput,
25+
RerankerRequestConfig,
26+
RerankedDocument,
27+
} from '../IRerankerService.js';
28+
29+
/** Configuration for the LLM judge reranker. */
30+
export interface LlmJudgeRerankerConfig {
31+
/** LLM call function: (systemPrompt, userPrompt, model?) → response text. */
32+
llmCallFn: (system: string, user: string, model?: string) => Promise<string>;
33+
/** Model for batch pointwise scoring (cheap). Auto-detected if not set. */
34+
scoringModel?: string;
35+
/** Model for listwise final ranking (better). Agent's primary if not set. */
36+
rankingModel?: string;
37+
/** Max documents to process in phase 1. */
38+
maxPointwiseDocuments?: number;
39+
/** How many survive phase 1 into phase 2. */
40+
pointwiseTopK?: number;
41+
/** Timeout per LLM call in ms. */
42+
timeoutMs?: number;
43+
/** Batch size for pointwise scoring. */
44+
batchSize?: number;
45+
}
46+
47+
const POINTWISE_SYSTEM = `You are a relevance scorer. Rate each document's relevance to the query on a scale of 0-10. 10 = perfectly relevant, 0 = completely irrelevant. Return ONLY a JSON array of integer scores, one per document, in the same order. Example: [8, 3, 7, 2, 9]`;
48+
49+
const LISTWISE_SYSTEM = `You are a relevance ranker. Rank the documents by relevance to the query, most relevant first. Return ONLY a JSON array of document IDs in ranked order. Example: ["doc-3", "doc-1", "doc-5"]`;
50+
51+
/** Two-phase LLM-based reranker: batch pointwise + listwise top-K. */
52+
export class LlmJudgeReranker implements IRerankerProvider {
53+
public readonly providerId = 'llm-judge' as const;
54+
55+
private readonly llmCallFn: LlmJudgeRerankerConfig['llmCallFn'];
56+
private readonly scoringModel?: string;
57+
private readonly rankingModel?: string;
58+
private readonly maxPointwiseDocuments: number;
59+
private readonly pointwiseTopK: number;
60+
private readonly batchSize: number;
61+
62+
constructor(config: LlmJudgeRerankerConfig) {
63+
this.llmCallFn = config.llmCallFn;
64+
this.scoringModel = config.scoringModel;
65+
this.rankingModel = config.rankingModel;
66+
this.maxPointwiseDocuments = config.maxPointwiseDocuments ?? 100;
67+
this.pointwiseTopK = config.pointwiseTopK ?? 20;
68+
this.batchSize = config.batchSize ?? 10;
69+
}
70+
71+
async isAvailable(): Promise<boolean> {
72+
return typeof this.llmCallFn === 'function';
73+
}
74+
75+
async rerank(input: RerankerInput, config: RerankerRequestConfig): Promise<RerankerOutput> {
76+
const topN = config.topN ?? this.pointwiseTopK;
77+
let documents = input.documents;
78+
79+
if (documents.length > this.maxPointwiseDocuments) {
80+
documents = documents.slice(0, this.maxPointwiseDocuments);
81+
}
82+
83+
// Phase 1: Batch pointwise scoring
84+
const scored = await this.batchPointwiseScore(input.query, documents);
85+
86+
// Sort by score descending, take top-K for phase 2
87+
scored.sort((a, b) => b.score - a.score);
88+
const candidates = scored.slice(0, this.pointwiseTopK);
89+
90+
// Phase 2: Listwise ranking
91+
let finalRanking: RerankedDocument[];
92+
try {
93+
finalRanking = await this.listwiseRank(input.query, candidates, topN);
94+
} catch {
95+
// Fallback: use pointwise scores
96+
finalRanking = candidates.slice(0, topN).map((c, i) => ({
97+
id: c.id,
98+
content: c.content,
99+
relevanceScore: 1 - (i / Math.max(topN, 1)),
100+
originalScore: c.originalScore,
101+
metadata: c.metadata,
102+
}));
103+
}
104+
105+
return { results: finalRanking };
106+
}
107+
108+
/** Phase 1: Score documents in batches. */
109+
private async batchPointwiseScore(
110+
query: string,
111+
documents: RerankerInput['documents'],
112+
): Promise<Array<RerankerInput['documents'][number] & { score: number }>> {
113+
const batches: RerankerInput['documents'][] = [];
114+
for (let i = 0; i < documents.length; i += this.batchSize) {
115+
batches.push(documents.slice(i, i + this.batchSize));
116+
}
117+
118+
const results: Array<RerankerInput['documents'][number] & { score: number }> = [];
119+
120+
for (const batch of batches) {
121+
const docList = batch
122+
.map((d, i) => `[${i + 1}] ${d.content.slice(0, 200)}`)
123+
.join('\n');
124+
const userPrompt = `Query: "${query}"\n\nDocuments:\n${docList}`;
125+
126+
try {
127+
const raw = await this.llmCallFn(POINTWISE_SYSTEM, userPrompt, this.scoringModel);
128+
const cleaned = raw.replace(/```json?\n?/g, '').replace(/```/g, '').trim();
129+
const scores = JSON.parse(cleaned) as number[];
130+
131+
for (let i = 0; i < batch.length; i++) {
132+
results.push({
133+
...batch[i],
134+
score: typeof scores[i] === 'number' ? scores[i] : 0,
135+
});
136+
}
137+
} catch {
138+
for (const doc of batch) {
139+
results.push({ ...doc, score: 0 });
140+
}
141+
}
142+
}
143+
144+
return results;
145+
}
146+
147+
/** Phase 2: Listwise ranking of top candidates. */
148+
private async listwiseRank(
149+
query: string,
150+
candidates: Array<RerankerInput['documents'][number] & { score: number }>,
151+
topN: number,
152+
): Promise<RerankedDocument[]> {
153+
const docList = candidates
154+
.map((d) => `[${d.id}] ${d.content.slice(0, 200)}`)
155+
.join('\n');
156+
const userPrompt = `Query: "${query}"\n\nDocuments:\n${docList}`;
157+
158+
const raw = await this.llmCallFn(LISTWISE_SYSTEM, userPrompt, this.rankingModel);
159+
const cleaned = raw.replace(/```json?\n?/g, '').replace(/```/g, '').trim();
160+
const ranking = JSON.parse(cleaned) as string[];
161+
162+
const candidateMap = new Map(candidates.map((c) => [c.id, c]));
163+
const results: RerankedDocument[] = [];
164+
165+
for (let i = 0; i < Math.min(ranking.length, topN); i++) {
166+
const doc = candidateMap.get(ranking[i]);
167+
if (!doc) continue;
168+
results.push({
169+
id: doc.id,
170+
content: doc.content,
171+
relevanceScore: 1 - (i / Math.max(ranking.length, 1)),
172+
originalScore: doc.originalScore,
173+
metadata: doc.metadata,
174+
});
175+
}
176+
177+
return results;
178+
}
179+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { describe, it, expect, vi } from 'vitest';
2+
import { LlmJudgeReranker } from '../LlmJudgeReranker.js';
3+
import type { RerankerInput, RerankerRequestConfig } from '../../IRerankerService.js';
4+
5+
function makeInput(n: number): RerankerInput {
6+
return {
7+
query: 'best programming languages',
8+
documents: Array.from({ length: n }, (_, i) => ({
9+
id: `doc-${i}`,
10+
content: `Document ${i} about programming language ${['Rust', 'Python', 'Go', 'Java', 'TypeScript', 'C++', 'Kotlin', 'Swift', 'Ruby', 'Elixir'][i % 10]}`,
11+
})),
12+
};
13+
}
14+
15+
const defaultConfig: RerankerRequestConfig = {
16+
providerId: 'llm-judge',
17+
modelId: 'gpt-4o-mini',
18+
topN: 5,
19+
};
20+
21+
describe('LlmJudgeReranker', () => {
22+
it('returns providerId "llm-judge"', () => {
23+
const reranker = new LlmJudgeReranker({ llmCallFn: vi.fn() });
24+
expect(reranker.providerId).toBe('llm-judge');
25+
});
26+
27+
it('is available when llmCallFn is provided', async () => {
28+
const reranker = new LlmJudgeReranker({ llmCallFn: vi.fn() });
29+
expect(await reranker.isAvailable()).toBe(true);
30+
});
31+
32+
it('scores documents via batch pointwise then listwise', async () => {
33+
const llmCallFn = vi.fn()
34+
.mockResolvedValueOnce('[8, 3, 7, 2, 9, 4, 6, 1, 5, 10]')
35+
.mockResolvedValueOnce('["doc-9", "doc-4", "doc-0", "doc-2", "doc-6"]');
36+
37+
const reranker = new LlmJudgeReranker({ llmCallFn, pointwiseTopK: 5 });
38+
39+
const result = await reranker.rerank(makeInput(10), defaultConfig);
40+
expect(result.results.length).toBe(5);
41+
expect(result.results[0].id).toBe('doc-9');
42+
expect(result.results[0].relevanceScore).toBeGreaterThan(result.results[1].relevanceScore);
43+
expect(llmCallFn).toHaveBeenCalledTimes(2);
44+
});
45+
46+
it('falls back to pointwise scores when listwise fails', async () => {
47+
const llmCallFn = vi.fn()
48+
.mockResolvedValueOnce('[8, 3, 7, 2, 9]')
49+
.mockRejectedValueOnce(new Error('LLM timeout'));
50+
51+
const reranker = new LlmJudgeReranker({ llmCallFn, pointwiseTopK: 3 });
52+
53+
const result = await reranker.rerank(makeInput(5), { ...defaultConfig, topN: 3 });
54+
expect(result.results.length).toBe(3);
55+
expect(result.results[0].id).toBe('doc-4');
56+
});
57+
58+
it('handles batch errors gracefully', async () => {
59+
const llmCallFn = vi.fn()
60+
.mockRejectedValueOnce(new Error('batch 1 failed'))
61+
.mockResolvedValueOnce('[5, 8, 3, 7, 2, 9, 4, 6, 1, 10]')
62+
.mockResolvedValueOnce('["doc-19", "doc-15", "doc-11"]');
63+
64+
const reranker = new LlmJudgeReranker({ llmCallFn, pointwiseTopK: 3 });
65+
66+
const result = await reranker.rerank(makeInput(20), { ...defaultConfig, topN: 3 });
67+
expect(result.results.length).toBe(3);
68+
});
69+
});

0 commit comments

Comments
 (0)