Skip to content

Commit 109f00c

Browse files
authored
feat(core): add QMD-inspired hybrid search pipeline (#40)
* feat(core): add QMD-inspired hybrid search pipeline Implement a hybrid search system combining vector embeddings with keyword search: - Vector embeddings using node-llama-cpp with GGUF models (nomic-embed-text) - RRF (Reciprocal Rank Fusion) for combining multiple rankers - Query expansion with LLM and synonym fallback - Position-aware score blending (different weights for top3, top10, rest) - SQLite storage with sqlite-vec extension (falls back to in-memory) - Auto-download of local GGUF models (~2GB) New search module: - embeddings.ts: Vector embedding generation service - vector-store.ts: SQLite-based vector storage - rrf.ts: RRF fusion algorithm - expansion.ts: Query expansion with caching - hybrid.ts: Main hybrid search pipeline - local-models.ts: GGUF model management CLI integration: - skillkit recommend --hybrid: Enable hybrid search - skillkit recommend --expand: Enable query expansion - skillkit recommend --rerank: Enable LLM re-ranking - skillkit recommend --build-index: Build embedding index Tests: 56 new tests for search module (all passing) * fix(search): address CodeRabbit review findings - Add missing beforeEach import in embeddings.test.ts - Update optional dependencies to current versions - Fix weights/variations length mismatch in expansion.ts - Avoid mutating input arrays in rrf.ts mergeRankings - Delete orphaned chunk vectors in vector-store.ts delete() and clear() - Add comment for empty catch block in vector-store.ts - Make hybridSearch helper use buildIndex for true hybrid search - Handle graceful degradation when embeddings unavailable - Add node-llama-cpp mock in hybrid.test.ts * fix(search): address CodeRabbit review round 2 - Fix gemma GGUF URL to point to valid community repo (bartowski) - Add SQL injection protection for table names in vector-store.ts - Fix resource leak with try/finally in hybridSearch helper - Add backpressure handling for large model downloads - Clamp startLine to non-negative in chunk calculation - Add runtime validation warning for --expand/--rerank without --hybrid - Update node-llama-cpp API usage (createEmbeddingContext, createContext) - Update pnpm-lock.yaml for new optional dependency versions * fix(search): use @ts-ignore for optional node-llama-cpp imports CI doesn't install optional dependencies, so @ts-ignore is needed instead of @ts-expect-error to suppress module not found errors.
1 parent e3d7e0e commit 109f00c

File tree

18 files changed

+4474
-1
lines changed

18 files changed

+4474
-1
lines changed

packages/cli/src/commands/recommend.ts

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ export class RecommendCommand extends Command {
5252
['Update skill index', '$0 recommend --update'],
5353
['Search for skills by task', '$0 recommend --task "authentication"'],
5454
['Search for skills (alias)', '$0 recommend --search "testing"'],
55+
['Hybrid search (vector + keyword)', '$0 recommend --search "auth" --hybrid'],
56+
['Hybrid search with query expansion', '$0 recommend --search "auth" --hybrid --expand'],
57+
['Build hybrid search index', '$0 recommend --build-index'],
5558
],
5659
});
5760

@@ -125,6 +128,26 @@ export class RecommendCommand extends Command {
125128
description: 'Show category path for each recommendation',
126129
});
127130

131+
// Hybrid search mode
132+
hybrid = Option.Boolean('--hybrid,-H', false, {
133+
description: 'Use hybrid search (vector + keyword)',
134+
});
135+
136+
// Query expansion
137+
expand = Option.Boolean('--expand,-x', false, {
138+
description: 'Enable query expansion (requires --hybrid)',
139+
});
140+
141+
// Reranking
142+
rerank = Option.Boolean('--rerank', false, {
143+
description: 'Enable LLM reranking (requires --hybrid)',
144+
});
145+
146+
// Build index
147+
buildIndex = Option.Boolean('--build-index', false, {
148+
description: 'Build/rebuild the hybrid search embedding index',
149+
});
150+
128151
async execute(): Promise<number> {
129152
const targetPath = resolve(this.projectPath || process.cwd());
130153

@@ -133,6 +156,16 @@ export class RecommendCommand extends Command {
133156
return await this.updateIndex();
134157
}
135158

159+
// Handle hybrid index building
160+
if (this.buildIndex) {
161+
return await this.buildHybridIndex();
162+
}
163+
164+
// Validate hybrid-dependent options
165+
if ((this.expand || this.rerank) && !this.hybrid) {
166+
warn('--expand and --rerank require --hybrid flag. These options will be ignored.');
167+
}
168+
136169
if (!this.quiet && !this.json) {
137170
header('Skill Recommendations');
138171
}
@@ -168,6 +201,9 @@ export class RecommendCommand extends Command {
168201
// Handle search mode (--search or --task)
169202
const searchQuery = this.search || this.task;
170203
if (searchQuery) {
204+
if (this.hybrid) {
205+
return await this.handleHybridSearch(engine, searchQuery);
206+
}
171207
return this.handleSearch(engine, searchQuery);
172208
}
173209

@@ -446,6 +482,132 @@ export class RecommendCommand extends Command {
446482
console.log(colors.muted('More details: skillkit recommend --explain --verbose'));
447483
}
448484

485+
private async handleHybridSearch(engine: RecommendationEngine, query: string): Promise<number> {
486+
if (!this.quiet && !this.json) {
487+
header(`Hybrid Search: "${query}"`);
488+
}
489+
490+
const s = !this.quiet && !this.json ? spinner() : null;
491+
s?.start('Initializing hybrid search...');
492+
493+
try {
494+
await engine.initHybridSearch();
495+
s?.message('Searching...');
496+
497+
const results = await engine.hybridSearch({
498+
query,
499+
limit: this.limit ? parseInt(this.limit, 10) : 10,
500+
hybrid: true,
501+
enableExpansion: this.expand,
502+
enableReranking: this.rerank,
503+
filters: {
504+
minScore: this.minScore ? parseInt(this.minScore, 10) : undefined,
505+
},
506+
});
507+
508+
s?.stop(`Found ${results.length} results`);
509+
510+
if (this.json) {
511+
console.log(JSON.stringify(results, null, 2));
512+
return 0;
513+
}
514+
515+
if (results.length === 0) {
516+
warn(`No skills found matching "${query}"`);
517+
return 0;
518+
}
519+
520+
console.log('');
521+
console.log(colors.bold(`Hybrid search results for "${query}" (${results.length} found):`));
522+
if (this.expand && results[0]?.expandedTerms?.length) {
523+
console.log(colors.muted(` Expanded: ${results[0].expandedTerms.join(', ')}`));
524+
}
525+
console.log('');
526+
527+
for (const result of results) {
528+
let relevanceColor: (text: string) => string;
529+
if (result.relevance >= 70) {
530+
relevanceColor = colors.success;
531+
} else if (result.relevance >= 50) {
532+
relevanceColor = colors.warning;
533+
} else {
534+
relevanceColor = colors.muted;
535+
}
536+
const relevanceBar = progressBar(result.relevance, 100, 10);
537+
538+
console.log(` ${relevanceColor(`${result.relevance}%`)} ${colors.dim(relevanceBar)} ${colors.bold(result.skill.name)}`);
539+
540+
if (result.skill.description) {
541+
console.log(` ${colors.muted(truncate(result.skill.description, 70))}`);
542+
}
543+
544+
if (this.verbose) {
545+
const scores: string[] = [];
546+
if (typeof result.vectorSimilarity === 'number') {
547+
scores.push(`vector: ${(result.vectorSimilarity * 100).toFixed(0)}%`);
548+
}
549+
if (typeof result.keywordScore === 'number') {
550+
scores.push(`keyword: ${result.keywordScore.toFixed(0)}%`);
551+
}
552+
if (typeof result.rrfScore === 'number') {
553+
scores.push(`rrf: ${result.rrfScore.toFixed(3)}`);
554+
}
555+
if (scores.length > 0) {
556+
console.log(` ${colors.dim('Scores:')} ${scores.join(' | ')}`);
557+
}
558+
}
559+
560+
if (result.matchedTerms.length > 0) {
561+
console.log(` ${colors.dim('Matched:')} ${result.matchedTerms.join(', ')}`);
562+
}
563+
564+
console.log('');
565+
}
566+
567+
return 0;
568+
} catch (err) {
569+
s?.stop(colors.error('Hybrid search failed'));
570+
console.log(colors.muted(err instanceof Error ? err.message : String(err)));
571+
console.log(colors.muted('Falling back to standard search...'));
572+
return this.handleSearch(engine, query);
573+
}
574+
}
575+
576+
private async buildHybridIndex(): Promise<number> {
577+
if (!this.quiet) {
578+
header('Build Hybrid Search Index');
579+
}
580+
581+
const index = this.loadIndex();
582+
if (!index || index.skills.length === 0) {
583+
warn('No skill index found. Run --update first.');
584+
return 1;
585+
}
586+
587+
const s = spinner();
588+
s.start('Initializing...');
589+
590+
try {
591+
const engine = new RecommendationEngine();
592+
engine.loadIndex(index);
593+
594+
await engine.buildHybridIndex((progress) => {
595+
const percentage = Math.round((progress.current / progress.total) * 100);
596+
s.message(`${progress.phase}: ${progress.message || ''} (${percentage}%)`);
597+
});
598+
599+
s.stop(colors.success(`${symbols.success} Built hybrid index for ${index.skills.length} skills`));
600+
console.log(colors.muted(' Index stored in: ~/.skillkit/search.db'));
601+
console.log(colors.muted(' Use --hybrid flag for vector+keyword search\n'));
602+
603+
return 0;
604+
} catch (err) {
605+
s.stop(colors.error('Failed to build hybrid index'));
606+
console.log(colors.muted(err instanceof Error ? err.message : String(err)));
607+
return 1;
608+
}
609+
}
610+
449611
private handleSearch(engine: RecommendationEngine, query: string): number {
450612
if (!this.quiet && !this.json) {
451613
header(`Search: "${query}"`);

packages/core/package.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
"./learning": {
3838
"import": "./dist/learning/index.js",
3939
"types": "./dist/learning/index.d.ts"
40+
},
41+
"./search": {
42+
"import": "./dist/search/index.js",
43+
"types": "./dist/search/index.d.ts"
4044
}
4145
},
4246
"files": [
@@ -53,7 +57,13 @@
5357
"yaml": "^2.6.1",
5458
"zod": "^3.24.1"
5559
},
60+
"optionalDependencies": {
61+
"node-llama-cpp": "^3.15.0",
62+
"better-sqlite3": "^12.0.0",
63+
"sqlite-vec": "^0.1.6"
64+
},
5665
"devDependencies": {
66+
"@types/better-sqlite3": "^7.6.11",
5767
"@types/node": "^22.10.5",
5868
"tsup": "^8.3.5",
5969
"typescript": "^5.7.2",

packages/core/src/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,6 @@ export * from './connectors/index.js';
101101

102102
// Execution Flow (Step Tracking & Metrics - Phase 21)
103103
export * from './execution/index.js';
104+
105+
// Hybrid Search (QMD-Inspired Vector + Keyword Search)
106+
export * from './search/index.js';

packages/core/src/recommend/engine.ts

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,51 @@ import type {
1111
SearchOptions,
1212
SearchResult,
1313
FreshnessResult,
14+
RecommendHybridSearchOptions,
15+
RecommendHybridSearchResult,
1416
} from './types.js';
1517
import { DEFAULT_SCORING_WEIGHTS, TAG_TO_TECH, getTechTags } from './types.js';
18+
import type { HybridSearchPipeline } from '../search/hybrid.js';
1619

1720
/**
1821
* Recommendation engine for matching skills to project profiles
1922
*/
2023
export class RecommendationEngine {
2124
private weights: ScoringWeights;
2225
private index: SkillIndex | null = null;
26+
private hybridPipeline: HybridSearchPipeline | null = null;
2327

2428
constructor(weights?: Partial<ScoringWeights>) {
2529
this.weights = { ...DEFAULT_SCORING_WEIGHTS, ...weights };
2630
}
2731

32+
/**
33+
* Initialize hybrid search pipeline for vector + keyword search
34+
*/
35+
async initHybridSearch(): Promise<void> {
36+
const { createHybridSearchPipeline } = await import('../search/hybrid.js');
37+
this.hybridPipeline = createHybridSearchPipeline();
38+
if (this.index) {
39+
this.hybridPipeline.loadSkillsIndex(this.index);
40+
}
41+
await this.hybridPipeline.initialize();
42+
}
43+
44+
/**
45+
* Check if hybrid search is available
46+
*/
47+
isHybridSearchAvailable(): boolean {
48+
return this.hybridPipeline !== null && this.hybridPipeline.isInitialized();
49+
}
50+
2851
/**
2952
* Load skill index from cache or generate from local skills
3053
*/
3154
loadIndex(index: SkillIndex): void {
3255
this.index = index;
56+
if (this.hybridPipeline) {
57+
this.hybridPipeline.loadSkillsIndex(index);
58+
}
3359
}
3460

3561
/**
@@ -603,6 +629,71 @@ export class RecommendationEngine {
603629
return { relevance, matchedTerms, snippet };
604630
}
605631

632+
/**
633+
* Hybrid search combining vector embeddings and keyword matching
634+
*/
635+
async hybridSearch(options: RecommendHybridSearchOptions): Promise<RecommendHybridSearchResult[]> {
636+
const { query, limit = 10, hybrid = true, enableExpansion = false, enableReranking = false, filters } = options;
637+
638+
if (!hybrid || !this.hybridPipeline) {
639+
const basicResults = this.search({ query, limit, semantic: true, filters });
640+
return basicResults.map((r) => ({
641+
...r,
642+
hybridScore: r.relevance / 100,
643+
}));
644+
}
645+
646+
const response = await this.hybridPipeline.search({
647+
query,
648+
limit,
649+
enableExpansion,
650+
enableReranking,
651+
});
652+
653+
let results = response.results.map((r) => ({
654+
skill: r.skill,
655+
relevance: r.relevance,
656+
matchedTerms: r.matchedTerms,
657+
snippet: r.snippet,
658+
hybridScore: r.hybridScore,
659+
vectorSimilarity: r.vectorSimilarity,
660+
keywordScore: r.keywordScore,
661+
rrfScore: r.rrfScore,
662+
expandedTerms: r.expandedTerms,
663+
}));
664+
665+
if (filters?.tags && filters.tags.length > 0) {
666+
results = results.filter((r) =>
667+
r.skill.tags?.some((t) => filters.tags!.includes(t))
668+
);
669+
}
670+
if (filters?.verified) {
671+
results = results.filter((r) => r.skill.verified);
672+
}
673+
if (filters?.minScore) {
674+
results = results.filter((r) => r.relevance >= filters.minScore!);
675+
}
676+
677+
return results.slice(0, limit);
678+
}
679+
680+
/**
681+
* Build hybrid search index from skills
682+
*/
683+
async buildHybridIndex(
684+
onProgress?: (progress: { phase: string; current: number; total: number; message?: string }) => void
685+
): Promise<void> {
686+
if (!this.index) {
687+
throw new Error('No skill index loaded. Call loadIndex() first.');
688+
}
689+
690+
if (!this.hybridPipeline) {
691+
await this.initHybridSearch();
692+
}
693+
694+
await this.hybridPipeline!.buildIndex(this.index.skills, onProgress);
695+
}
696+
606697
/**
607698
* Check freshness of installed skills against project dependencies
608699
*

packages/core/src/recommend/types.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,3 +326,25 @@ export interface ReasoningRecommendationResult extends RecommendationResult {
326326
strategy: string;
327327
};
328328
}
329+
330+
/**
331+
* Hybrid search options for RecommendationEngine
332+
*/
333+
export interface RecommendHybridSearchOptions extends SearchOptions {
334+
hybrid?: boolean;
335+
enableExpansion?: boolean;
336+
enableReranking?: boolean;
337+
semanticWeight?: number;
338+
keywordWeight?: number;
339+
}
340+
341+
/**
342+
* Hybrid search result with additional metadata for RecommendationEngine
343+
*/
344+
export interface RecommendHybridSearchResult extends SearchResult {
345+
hybridScore?: number;
346+
vectorSimilarity?: number;
347+
keywordScore?: number;
348+
rrfScore?: number;
349+
expandedTerms?: string[];
350+
}

0 commit comments

Comments
 (0)