Skip to content

Commit ca3d6d4

Browse files
authored
feat(codewhisperer): crossfile js, ts jsx, tsx support #3668
* restructure crossfile/utg code path * add ts js tsx jsx crossfile support * cleanup test states in both beforeEach and afterEach in editorUtilities.test.ts
1 parent 1e1bce1 commit ca3d6d4

File tree

15 files changed

+480
-181
lines changed

15 files changed

+480
-181
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "Feature",
3+
"description": "CodeWhisperer: Improve Java suggestion quality with enhanced file context fetching"
4+
}

src/codewhisperer/models/constants.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ export const javascript = 'javascript'
7676

7777
export const typescript = 'typescript'
7878

79+
export const jsx = 'javascriptreact'
80+
81+
export const tsx = 'typescriptreact'
82+
7983
export const plaintext = 'plaintext'
8084

8185
// use vscode languageId here

src/codewhisperer/util/supplementalContext/codeParsingUtil.ts

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6+
import * as vscode from 'vscode'
67
import path = require('path')
78
import { DependencyGraph } from '../dependencyGraph/dependencyGraph'
8-
import * as vscode from 'vscode'
99

1010
export interface utgLanguageConfig {
1111
extension: string
@@ -68,29 +68,30 @@ export function countSubstringMatches(arr1: string[], arr2: string[]): number {
6868
return count
6969
}
7070

71-
export async function isTestFile(editor: vscode.TextEditor, dependencyGraph: DependencyGraph): Promise<boolean> {
72-
const languageConfig = utgLanguageConfigs[editor.document.languageId]
73-
if (!languageConfig) {
74-
// We have enabled the support only for python and Java for this check
75-
// as we depend on Regex for this validation.
76-
return false
77-
}
78-
79-
// TODO (Metrics): Add total number of calls to isTestFile
80-
if (isTestFileByName(editor.document.uri.fsPath, editor.document.languageId)) {
81-
return true
71+
export async function isTestFile(
72+
filePath: string,
73+
languageConfig: {
74+
languageId: vscode.TextDocument['languageId']
75+
dependencyGraph?: DependencyGraph
76+
fileContent?: string
8277
}
78+
): Promise<boolean> {
79+
const pathContainsTest = filePath.includes(`tests/`) || filePath.includes('test/') || filePath.includes('tst/')
80+
const fileNameMatchTestPatterns = isTestFileByName(filePath, languageConfig.languageId)
8381

84-
// TODO (Metrics): Add metrics for isTestFileByName Failure
85-
// (to help us determine if people follow naming conventions)
86-
if (await dependencyGraph.isTestFile(editor.document.getText())) {
82+
if (pathContainsTest || fileNameMatchTestPatterns) {
8783
return true
8884
}
8985

90-
return false
86+
// This run slowly thus lazily execute
87+
const fileHasTestDependency =
88+
languageConfig.dependencyGraph && languageConfig.fileContent
89+
? await languageConfig.dependencyGraph.isTestFile(languageConfig.fileContent)
90+
: false
91+
return fileHasTestDependency
9192
}
9293

93-
export function isTestFileByName(filePath: string, language: string): boolean {
94+
function isTestFileByName(filePath: string, language: vscode.TextDocument['languageId']): boolean {
9495
const languageConfig = utgLanguageConfigs[language]
9596
if (!languageConfig) {
9697
// We have enabled the support only for python and Java for this check
@@ -100,8 +101,6 @@ export function isTestFileByName(filePath: string, language: string): boolean {
100101
const testFilenamePattern = languageConfig.testFilenamePattern
101102

102103
const filename = path.basename(filePath)
103-
if (testFilenamePattern.test(filename)) {
104-
return true
105-
}
106-
return false
104+
105+
return testFilenamePattern.test(filename)
107106
}

src/codewhisperer/util/supplementalContext/crossFileContextUtil.ts

Lines changed: 74 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,40 @@
55

66
import * as vscode from 'vscode'
77
import * as fs from 'fs-extra'
8-
import { DependencyGraph } from '../dependencyGraph/dependencyGraph'
8+
import path = require('path')
99
import { BM25Document, BM25Okapi } from './rankBm25'
10-
import { isRelevant } from './editorFilesUtil'
1110
import { ToolkitError } from '../../../shared/errors'
1211
import { UserGroup, crossFileContextConfig, supplemetalContextFetchingTimeoutMsg } from '../../models/constants'
1312
import { CancellationError } from '../../../shared/utilities/timeoutUtils'
1413
import { CodeWhispererSupplementalContextItem } from './supplementalContextUtil'
1514
import { CodeWhispererUserGroupSettings } from '../userGroupUtil'
15+
import { isTestFile } from './codeParsingUtil'
16+
import { getOpenFilesInWindow } from '../../../shared/utilities/editorUtilities'
17+
18+
type CrossFileSupportedLanguage =
19+
| 'java'
20+
| 'python'
21+
| 'javascript'
22+
| 'typescript'
23+
| 'javascriptreact'
24+
| 'typescriptreact'
25+
26+
// TODO: ugly, can we make it prettier? like we have to manually type 'java', 'javascriptreact' which is error prone
27+
// TODO: Move to another config file or constants file
28+
// Supported language to its corresponding file ext
29+
const supportedLanguageToDialects: Readonly<Record<CrossFileSupportedLanguage, Set<string>>> = {
30+
java: new Set<string>(['.java']),
31+
python: new Set<string>(['.py']),
32+
javascript: new Set<string>(['.js', '.jsx']),
33+
javascriptreact: new Set<string>(['.js', '.jsx']),
34+
typescript: new Set<string>(['.ts', '.tsx']),
35+
typescriptreact: new Set<string>(['.ts', '.tsx']),
36+
}
37+
38+
function isCrossFileSupported(languageId: string): languageId is CrossFileSupportedLanguage {
39+
return Object.keys(supportedLanguageToDialects).includes(languageId)
40+
}
1641

17-
const crossFileLanguageConfigs = ['java']
1842
interface Chunk {
1943
fileName: string
2044
content: string
@@ -24,20 +48,21 @@ interface Chunk {
2448

2549
export async function fetchSupplementalContextForSrc(
2650
editor: vscode.TextEditor,
27-
dependencyGraph: DependencyGraph,
2851
cancellationToken: vscode.CancellationToken
2952
): Promise<CodeWhispererSupplementalContextItem[] | undefined> {
30-
if (crossFileLanguageConfigs.includes(editor.document.languageId) === false) {
31-
return undefined
32-
}
53+
const shouldProceed = shouldFetchCrossFileContext(
54+
editor.document.languageId,
55+
CodeWhispererUserGroupSettings.instance.userGroup
56+
)
3357

34-
if (CodeWhispererUserGroupSettings.instance.userGroup !== UserGroup.CrossFile) {
35-
return []
58+
if (!shouldProceed) {
59+
return shouldProceed === undefined ? undefined : []
3660
}
3761

3862
// Step 1: Get relevant cross files to refer
39-
const relevantCrossFilePaths = await getRelevantCrossFiles(editor, dependencyGraph)
63+
const relevantCrossFilePaths = await getCrossFileCandidates(editor)
4064
throwIfCancelled(cancellationToken)
65+
4166
// Step 2: Split files to chunks with upper bound on chunkCount
4267
// We restrict the total number of chunks to improve on latency.
4368
// Chunk linking is required as we want to pass the next chunk value for matched chunk.
@@ -110,6 +135,27 @@ function getInputChunk(editor: vscode.TextEditor, chunkSize: number) {
110135
return inputChunk
111136
}
112137

138+
/**
139+
* Util to decide if we need to fetch crossfile context since CodeWhisperer CrossFile Context feature is gated by userGroup and language level
140+
* @param languageId: VSCode language Identifier
141+
* @param userGroup: CodeWhisperer user group settings, refer to userGroupUtil.ts
142+
* @returns specifically returning undefined if the langueage is not supported,
143+
* otherwise true/false depending on if the language is fully supported or not belonging to the user group
144+
*/
145+
function shouldFetchCrossFileContext(languageId: string, userGroup: UserGroup): boolean | undefined {
146+
if (!isCrossFileSupported(languageId)) {
147+
return undefined
148+
}
149+
150+
if (languageId === 'java') {
151+
return true
152+
} else if (supportedLanguageToDialects[languageId] && userGroup === UserGroup.CrossFile) {
153+
return true
154+
} else {
155+
return false
156+
}
157+
}
158+
113159
/**
114160
* This linking is required from science experimentations to pass the next contnet chunk
115161
* when a given chunk context passes the match in BM25.
@@ -158,29 +204,27 @@ function splitFileToChunks(filePath: string, chunkSize: number): Chunk[] {
158204
* This function will return relevant cross files for the given editor file
159205
* by referencing open files, imported files and same package files.
160206
*/
161-
async function getRelevantCrossFiles(editor: vscode.TextEditor, dependencyGraph: DependencyGraph): Promise<string[]> {
162-
return getOpenFilesInWindow().filter(file => {
163-
return isRelevant(editor.document.fileName, file, editor.document.languageId)
207+
async function getCrossFileCandidates(editor: vscode.TextEditor): Promise<string[]> {
208+
const targetFile = editor.document.uri.fsPath
209+
const language = editor.document.languageId as CrossFileSupportedLanguage
210+
const dialects = supportedLanguageToDialects[language]
211+
212+
/**
213+
* Consider a file which
214+
* 1. is different from the target
215+
* 2. has the same file extension or it's one of the dialect of target file (e.g .js vs. .jsx)
216+
* 3. is not a test file
217+
*/
218+
return await getOpenFilesInWindow(async candidateFile => {
219+
return (
220+
targetFile !== candidateFile &&
221+
(path.extname(targetFile) === path.extname(candidateFile) ||
222+
(dialects && dialects.has(path.extname(candidateFile)))) &&
223+
!(await isTestFile(candidateFile, { languageId: language }))
224+
)
164225
})
165226
}
166227

167-
function getOpenFilesInWindow(): string[] {
168-
const filesOpenedInEditor: string[] = []
169-
170-
try {
171-
const tabArrays = vscode.window.tabGroups.all
172-
tabArrays.forEach(tabArray => {
173-
tabArray.tabs.forEach(tab => {
174-
filesOpenedInEditor.push((tab.input as any).uri.path)
175-
})
176-
})
177-
} catch (e) {
178-
// Older versions of VSC do not have the tab API
179-
}
180-
181-
return filesOpenedInEditor
182-
}
183-
184228
function throwIfCancelled(token: vscode.CancellationToken): void | never {
185229
if (token.isCancellationRequested) {
186230
throw new ToolkitError(supplemetalContextFetchingTimeoutMsg, { cause: new CancellationError('timeout') })

src/codewhisperer/util/supplementalContext/editorFilesUtil.ts

Lines changed: 0 additions & 55 deletions
This file was deleted.

src/codewhisperer/util/supplementalContext/supplementalContextUtil.ts

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,18 @@ export async function fetchSupplementalContext(
3535
const timesBeforeFetching = performance.now()
3636
const dependencyGraph = DependencyGraphFactory.getDependencyGraph(editor.document.languageId)
3737

38-
if (dependencyGraph === undefined) {
39-
// This is a general check for language support of CW.
40-
// We perform feature level language filtering later.
41-
return undefined
42-
}
38+
const isUtg = await isTestFile(editor.document.uri.fsPath, {
39+
languageId: editor.document.languageId,
40+
dependencyGraph: dependencyGraph,
41+
fileContent: editor.document.getText(),
42+
})
4343

44-
const isUtg = await isTestFile(editor, dependencyGraph)
4544
let supplementalContextPromise: Promise<CodeWhispererSupplementalContextItem[] | undefined>
4645

4746
if (isUtg) {
48-
supplementalContextPromise = fetchSupplementalContextForTest(editor, dependencyGraph, cancellationToken)
47+
supplementalContextPromise = fetchSupplementalContextForTest(editor, cancellationToken)
4948
} else {
50-
supplementalContextPromise = fetchSupplementalContextForSrc(editor, dependencyGraph, cancellationToken)
49+
supplementalContextPromise = fetchSupplementalContextForSrc(editor, cancellationToken)
5150
}
5251

5352
return supplementalContextPromise

0 commit comments

Comments
 (0)