Skip to content

Commit 19813d0

Browse files
authored
Merge pull request #13 from supermemoryai/12-18-add_contextualtext_for_embedding
add contextualText for embedding
2 parents c37bf0f + 041bcfa commit 19813d0

File tree

803 files changed

+326
-125257
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

803 files changed

+326
-125257
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,5 @@ yarn-error.log*
2020
.turbo
2121
todo.md
2222
plan.md
23+
eval/cache
24+
eval/results

eval/chunkers/ast.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@
22
* AST-aware chunker wrapper for evaluation
33
*
44
* Wraps the astchunk library for use in the evaluation harness.
5+
* Uses the built-in contextualizedText for better embedding quality.
56
*/
67

78
import { chunk } from '../../src'
89

910
/**
1011
* Chunk a file using AST-aware chunking and return results
1112
* in a format compatible with the evaluation
13+
*
14+
* @param filepath - Path to the file
15+
* @param code - Source code content
16+
* @param maxNws - Maximum NWS characters per chunk (default: 1500)
1217
*/
1318
export async function chunkFile(
1419
filepath: string,
@@ -28,7 +33,7 @@ export async function chunkFile(
2833

2934
return chunks.map((c) => ({
3035
id: `${filepath}:${c.lineRange.start}-${c.lineRange.end}`,
31-
text: c.text,
36+
text: c.contextualizedText,
3237
startLine: c.lineRange.start,
3338
endLine: c.lineRange.end,
3439
}))

eval/data/repoeval/datasets/api_level_completion_1k_context_codegen.test.jsonl

Lines changed: 0 additions & 1600 deletions
This file was deleted.

eval/data/repoeval/datasets/api_level_completion_2k_context_codegen.test.jsonl

Lines changed: 0 additions & 1600 deletions
This file was deleted.

eval/data/repoeval/datasets/api_level_completion_2k_context_codex.test.jsonl

Lines changed: 0 additions & 1600 deletions
This file was deleted.

eval/data/repoeval/datasets/api_level_completion_4k_context_codex.test.jsonl

Lines changed: 0 additions & 1600 deletions
This file was deleted.

eval/data/repoeval/datasets/function_level_completion_2k_context_codex.test.jsonl

Lines changed: 0 additions & 455 deletions
This file was deleted.

eval/data/repoeval/datasets/function_level_completion_4k_context_codex.test.jsonl

Lines changed: 0 additions & 455 deletions
This file was deleted.

eval/data/repoeval/datasets/line_level_completion_1k_context_codegen.test.jsonl

Lines changed: 0 additions & 1600 deletions
This file was deleted.

eval/data/repoeval/datasets/line_level_completion_2k_context_codegen.test.jsonl

Lines changed: 0 additions & 1600 deletions
This file was deleted.

0 commit comments

Comments
 (0)