4
4
*/
5
5
6
6
import * as vscode from 'vscode'
7
- import * as codewhispererClient from '../../client/codewhisperer'
8
7
import * as fs from 'fs-extra'
9
8
import { DependencyGraph } from '../dependencyGraph/dependencyGraph'
10
- import { BMDocument , performBM25Scoring } from './rankBm25'
11
- import { getRelevantFilesFromEditor , isRelevant } from './editorFilesUtil'
9
+ import { BM25Document , BM25Okapi } from './rankBm25'
10
+ import { isRelevant } from './editorFilesUtil'
12
11
import { ToolkitError } from '../../../shared/errors'
13
- import { supplemetalContextFetchingTimeoutMsg } from '../../models/constants'
12
+ import { crossFileContextConfig , supplemetalContextFetchingTimeoutMsg } from '../../models/constants'
14
13
import { CancellationError } from '../../../shared/utilities/timeoutUtils'
14
+ import { CodeWhispererSupplementalContextItem } from './supplementalContextUtil'
15
15
16
16
const crossFileLanguageConfigs = [ 'java' ]
17
17
interface Chunk {
18
18
fileName : string
19
19
content : string
20
20
nextContent : string
21
+ score ?: number
21
22
}
22
- const chunkSize = 10
23
- const chunkCount = 60
24
- const topK = 3
25
23
26
24
export async function fetchSupplementalContextForSrc (
27
25
editor : vscode . TextEditor ,
28
26
dependencyGraph : DependencyGraph ,
29
27
cancellationToken : vscode . CancellationToken
30
- ) {
28
+ ) : Promise < CodeWhispererSupplementalContextItem [ ] | undefined > {
31
29
if ( crossFileLanguageConfigs . includes ( editor . document . languageId ) === false ) {
32
30
return undefined
33
31
}
@@ -38,59 +36,66 @@ export async function fetchSupplementalContextForSrc(
38
36
// Step 2: Split files to chunks with upper bound on chunkCount
39
37
// We restrict the total number of chunks to improve on latency.
40
38
// Chunk linking is required as we want to pass the next chunk value for matched chunk.
41
- const chunkList : Chunk [ ] = [ ]
39
+ let chunkList : Chunk [ ] = [ ]
42
40
for ( const relevantFile of relevantCrossFilePaths ) {
43
41
throwIfCancelled ( cancellationToken )
44
-
45
- const chunks : Chunk [ ] = splitFileToChunks ( relevantFile , chunkSize )
42
+ const chunks : Chunk [ ] = splitFileToChunks ( relevantFile , crossFileContextConfig . numberOfLinesEachChunk )
46
43
const linkedChunks = linkChunks ( chunks )
47
44
chunkList . push ( ...linkedChunks )
48
- if ( chunkList . length >= chunkCount ) {
45
+ if ( chunkList . length >= crossFileContextConfig . numberOfChunkToFetch ) {
49
46
break
50
47
}
51
48
}
52
49
50
+ // it's required since chunkList.push(...) is likely giving us a list of size > 60
51
+ chunkList = chunkList . slice ( 0 , crossFileContextConfig . numberOfChunkToFetch )
52
+
53
53
// Step 3: Generate Input chunk (10 lines left of cursor position)
54
54
// and Find Best K chunks w.r.t input chunk using BM25
55
- const inputChunk : Chunk = getInputChunk ( editor , chunkSize )
56
- const bestChunks : Chunk [ ] = findBestKChunkMatches ( inputChunk , chunkList , topK )
55
+ const inputChunk : Chunk = getInputChunk ( editor , crossFileContextConfig . numberOfLinesEachChunk )
56
+ const bestChunks : Chunk [ ] = findBestKChunkMatches ( inputChunk , chunkList , crossFileContextConfig . topK )
57
57
throwIfCancelled ( cancellationToken )
58
58
59
59
// Step 4: Transform best chunks to supplemental contexts
60
- const supplementalContexts : codewhispererClient . SupplementalContext [ ] = [ ]
60
+ const supplementalContexts : CodeWhispererSupplementalContextItem [ ] = [ ]
61
61
for ( const chunk of bestChunks ) {
62
62
throwIfCancelled ( cancellationToken )
63
63
64
- const context = {
64
+ supplementalContexts . push ( {
65
65
filePath : chunk . fileName ,
66
66
content : chunk . nextContent ,
67
- } as codewhispererClient . SupplementalContext
68
- supplementalContexts . push ( context )
67
+ score : chunk . score ,
68
+ } )
69
69
}
70
70
71
71
return supplementalContexts
72
72
}
73
73
74
- function findBestKChunkMatches ( chunkInput : Chunk , chunkReferences : Chunk [ ] , k : number ) {
74
+ function findBestKChunkMatches ( chunkInput : Chunk , chunkReferences : Chunk [ ] , k : number ) : Chunk [ ] {
75
75
const chunkContentList = chunkReferences . map ( chunk => chunk . content )
76
+
76
77
//performBM25Scoring returns the output in a sorted order (descending of scores)
77
- const output : BMDocument [ ] = performBM25Scoring ( chunkContentList , chunkInput . content ) as BMDocument [ ]
78
- const bestChunks : Chunk [ ] = [ ]
79
- //pick Top 3
80
- for ( let i = 0 ; i < Math . min ( k , output . length ) ; i ++ ) {
81
- const chunkIndex = output [ i ] . index
78
+ const top3 : BM25Document [ ] = new BM25Okapi ( chunkContentList ) . topN ( chunkInput . content , crossFileContextConfig . topK )
79
+
80
+ return top3 . map ( doc => {
81
+ // reference to the original metadata since BM25.top3 will sort the result
82
+ const chunkIndex = doc . index
82
83
const chunkReference = chunkReferences [ chunkIndex ]
83
- bestChunks . push ( chunkReference )
84
- }
85
- return bestChunks
84
+ return {
85
+ content : chunkReference . content ,
86
+ fileName : chunkReference . fileName ,
87
+ nextContent : chunkReference . nextContent ,
88
+ score : doc . score ,
89
+ }
90
+ } )
86
91
}
87
92
88
93
/* This extract 10 lines to the left of the cursor from trigger file.
89
94
* This will be the inputquery to bm25 matching against list of cross-file chunks
90
95
*/
91
96
function getInputChunk ( editor : vscode . TextEditor , chunkSize : number ) {
92
97
const cursorPosition = editor . selection . active
93
- const startLine = Math . max ( cursorPosition . line - 10 , 0 )
98
+ const startLine = Math . max ( cursorPosition . line - chunkSize , 0 )
94
99
const endLine = Math . max ( cursorPosition . line - 1 , 0 )
95
100
const inputChunkContent = editor . document . getText (
96
101
new vscode . Range ( startLine , 0 , endLine , editor . document . lineAt ( endLine ) . text . length )
@@ -109,7 +114,7 @@ function linkChunks(chunks: Chunk[]) {
109
114
110
115
// This additional chunk is needed to create a next pointer to chunk 0.
111
116
const firstChunk = chunks [ 0 ]
112
- const firstChunkSubContent = firstChunk . content . split ( '\n' ) . slice ( 0 , 3 ) . join ( '\n' )
117
+ const firstChunkSubContent = firstChunk . content . split ( '\n' ) . slice ( 0 , 3 ) . join ( '\n' ) . trimEnd ( )
113
118
const newFirstChunk = {
114
119
fileName : firstChunk . fileName ,
115
120
content : firstChunkSubContent ,
@@ -132,12 +137,12 @@ function linkChunks(chunks: Chunk[]) {
132
137
function splitFileToChunks ( filePath : string , chunkSize : number ) : Chunk [ ] {
133
138
const chunks : Chunk [ ] = [ ]
134
139
135
- const fileContent = fs . readFileSync ( filePath , 'utf-8' )
140
+ const fileContent = fs . readFileSync ( filePath , 'utf-8' ) . trimEnd ( )
136
141
const lines = fileContent . split ( '\n' )
137
142
138
143
for ( let i = 0 ; i < lines . length ; i += chunkSize ) {
139
144
const chunkContent = lines . slice ( i , Math . min ( i + chunkSize , lines . length ) ) . join ( '\n' )
140
- const chunk = { fileName : filePath , content : chunkContent , nextContent : '' }
145
+ const chunk = { fileName : filePath , content : chunkContent . trimEnd ( ) , nextContent : '' }
141
146
chunks . push ( chunk )
142
147
}
143
148
return chunks
@@ -148,7 +153,10 @@ function splitFileToChunks(filePath: string, chunkSize: number): Chunk[] {
148
153
* by referencing open files, imported files and same package files.
149
154
*/
150
155
async function getRelevantCrossFiles ( editor : vscode . TextEditor , dependencyGraph : DependencyGraph ) : Promise < string [ ] > {
151
- const srcDependencies = await dependencyGraph . getSourceDependencies ( editor . document . uri , editor . document . getText ( ) )
156
+ const openedFilesInEditor = new Set ( getOpenFilesInWindow ( ) )
157
+
158
+ let srcDependencies = await dependencyGraph . getSourceDependencies ( editor . document . uri , editor . document . getText ( ) )
159
+ srcDependencies = moveToFront ( srcDependencies , openedFilesInEditor )
152
160
153
161
const samePackageFiles = await dependencyGraph . getSamePackageFiles (
154
162
editor . document . uri ,
@@ -158,21 +166,31 @@ async function getRelevantCrossFiles(editor: vscode.TextEditor, dependencyGraph:
158
166
return isRelevant ( editor . document . fileName , file , editor . document . languageId )
159
167
} )
160
168
161
- const relevantOpenFiles : vscode . Uri [ ] = await getRelevantFilesFromEditor (
162
- editor . document . fileName ,
163
- editor . document . languageId
164
- )
169
+ const mergedCrossFileList = [ ...new Set ( [ ...srcDependencies , ...samePackageRelevantFiles ] ) ]
165
170
166
- // We refer to only those open files which are in srcDependencies
167
- const filteredRelevantOpenFiles = relevantOpenFiles
168
- . filter ( file => srcDependencies . includes ( file . fsPath ) )
169
- . map ( file => file . fsPath )
171
+ return mergedCrossFileList
172
+ }
170
173
171
- const mergedCrossFileList = [
172
- ...new Set ( [ ...filteredRelevantOpenFiles , ...srcDependencies , ...samePackageRelevantFiles ] ) ,
173
- ]
174
+ // Util to move selected files to the front of the input array if it exists
175
+ function moveToFront < T > ( arr : T [ ] , picked : Set < T > ) {
176
+ return [ ...arr ] . sort ( ( a , b ) => ( picked . has ( b ) ? 1 : 0 ) - ( picked . has ( a ) ? 1 : 0 ) )
177
+ }
174
178
175
- return mergedCrossFileList
179
+ function getOpenFilesInWindow ( ) : string [ ] {
180
+ const filesOpenedInEditor : string [ ] = [ ]
181
+
182
+ try {
183
+ const tabArrays = vscode . window . tabGroups . all
184
+ tabArrays . forEach ( tabArray => {
185
+ tabArray . tabs . forEach ( tab => {
186
+ filesOpenedInEditor . push ( ( tab . input as any ) . uri . path )
187
+ } )
188
+ } )
189
+ } catch ( e ) {
190
+ // Older versions of VSC do not have the tab API
191
+ }
192
+
193
+ return filesOpenedInEditor
176
194
}
177
195
178
196
function throwIfCancelled ( token : vscode . CancellationToken ) : void | never {
0 commit comments