Skip to content

Commit 0884422

Browse files
committed
refactor(scoring): improve context relevancy scoring
Remove MIN_RESULTS constant and use MULTI_FILE_THRESHOLD consistently for minimum results filtering. Improve scoring by adding base score to symbol matches and simplify spatial distance calculation. Add score bonus for current file context to improve relevancy of local results. These changes make the context selection more accurate by: - Using consistent thresholds - Better handling of symbol matching scores - Prioritizing local context appropriately
1 parent 5014b41 commit 0884422

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

lua/CopilotChat/context.lua

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,15 @@ local OFF_SIDE_RULE_LANGUAGES = {
6969
local MIN_SYMBOL_SIMILARITY = 0.3
7070
local MIN_SEMANTIC_SIMILARITY = 0.4
7171
local MULTI_FILE_THRESHOLD = 5
72-
local MIN_RESULTS = 3
7372
local MAX_FILES = 2500
7473

7574
--- Compute the cosine similarity between two vectors
7675
---@param a table<number>
7776
---@param b table<number>
78-
---@param def number
7977
---@return number
80-
local function spatial_distance_cosine(a, b, def)
78+
local function spatial_distance_cosine(a, b)
8179
if not a or not b then
82-
return def or 0
80+
return 0
8381
end
8482

8583
local dot_product = 0
@@ -103,8 +101,9 @@ end
103101
local function data_ranked_by_relatedness(query, data, min_similarity)
104102
local results = {}
105103
for _, item in ipairs(data) do
106-
local similarity = spatial_distance_cosine(item.embedding, query.embedding, item.score)
107-
table.insert(results, vim.tbl_extend('force', item, { score = similarity }))
104+
local score = spatial_distance_cosine(item.embedding, query.embedding)
105+
score = score or item.score or 0
106+
table.insert(results, vim.tbl_extend('force', item, { score = score }))
108107
end
109108

110109
table.sort(results, function(a, b)
@@ -114,7 +113,7 @@ local function data_ranked_by_relatedness(query, data, min_similarity)
114113
-- Take top MAX_RESULTS items that meet threshold, or at least MIN_RESULTS items
115114
local filtered = {}
116115
for i, result in ipairs(results) do
117-
if (result.score >= min_similarity) or (i <= MIN_RESULTS) then
116+
if (result.score >= min_similarity) or (i <= MULTI_FILE_THRESHOLD) then
118117
table.insert(filtered, result)
119118
end
120119
end
@@ -175,7 +174,6 @@ local function data_ranked_by_symbols(query, data, min_similarity)
175174
local max_score = 0
176175

177176
for _, entry in ipairs(data) do
178-
local score = entry.score or 0
179177
local basename = vim.fn.fnamemodify(entry.filename, ':t'):gsub('%..*$', '')
180178

181179
-- Get trigrams for basename and compound version
@@ -187,7 +185,7 @@ local function data_ranked_by_symbols(query, data, min_similarity)
187185
local compound_sim = trigram_similarity(query_trigrams, compound_trigrams)
188186

189187
-- Take best match
190-
score = math.max(name_sim, compound_sim)
188+
local score = (entry.score or 0) + math.max(name_sim, compound_sim)
191189

192190
-- Add symbol matches
193191
if entry.symbols then
@@ -221,7 +219,7 @@ local function data_ranked_by_symbols(query, data, min_similarity)
221219
-- Filter results while preserving top scores
222220
local filtered_results = {}
223221
for i, result in ipairs(results) do
224-
if (result.score >= min_similarity) or (i <= MIN_RESULTS) then
222+
if (result.score >= min_similarity) or (i <= MULTI_FILE_THRESHOLD) then
225223
table.insert(filtered_results, result)
226224
end
227225
end
@@ -408,6 +406,7 @@ function M.files(winnr, with_content)
408406
content = table.concat(chunk, '\n'),
409407
filename = chunk_name,
410408
filetype = 'text',
409+
score = 0.2, -- Score bonus
411410
})
412411
end
413412

0 commit comments

Comments
 (0)