Skip to content

Commit 8f7b53d

Browse files
committed
allow for empty #variable data
1 parent a1d5a4c commit 8f7b53d

File tree

7 files changed

+44
-193
lines changed

7 files changed

+44
-193
lines changed

src/vs/editor/test/common/utils/testDecoder.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,16 +200,16 @@ export class TestDecoder<T extends BaseToken, D extends BaseDecoder<T>> extends
200200
) {
201201
for (let i = 0; i < expectedTokens.length; i++) {
202202
const expectedToken = expectedTokens[i];
203-
const receivedtoken = receivedTokens[i];
203+
const receivedToken = receivedTokens[i];
204204

205205
assertDefined(
206-
receivedtoken,
206+
receivedToken,
207207
`Expected token '${i}' to be '${expectedToken}', got 'undefined'.`,
208208
);
209209

210210
assert(
211-
receivedtoken.equals(expectedToken),
212-
`Expected token '${i}' to be '${expectedToken}', got '${receivedtoken}'.`,
211+
receivedToken.equals(expectedToken),
212+
`Expected token '${i}' to be '${expectedToken}', got '${receivedToken}'.`,
213213
);
214214
}
215215

src/vs/workbench/contrib/chat/common/promptSyntax/codecs/chatPromptDecoder.ts

Lines changed: 1 addition & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -19,146 +19,6 @@ import { MarkdownDecoder, TMarkdownToken } from '../../../../../../editor/common
1919
*/
2020
export type TChatPromptToken = MarkdownLink | PromptVariable | PromptVariableWithData;
2121

22-
23-
// TODO: @lego - cleanup
24-
// /**
25-
// * The Parser responsible for processing a `prompt variable name` syntax from
26-
// * a sequence of tokens (e.g., `#variable:`).
27-
// *
28-
// * The parsing process starts with single `#` token, then can accept `file` word,
29-
// * followed by the `:` token, resulting in the tokens sequence equivalent to
30-
// * the `#file:` text sequence. In this successful case, the parser transitions into
31-
// * the {@linkcode PartialPromptFileReference} parser to continue the parsing process.
32-
// */
33-
// class PartialPromptVariableName extends ParserBase<TMarkdownToken, PartialPromptVariableName | PartialPromptFileReference | FileReference> {
34-
// constructor(token: Hash) {
35-
// super([token]);
36-
// }
37-
38-
// public accept(token: TMarkdownToken): TAcceptTokenResult<PartialPromptVariableName | PartialPromptFileReference | FileReference> {
39-
// // given we currently hold the `#` token, if we receive a `file` word,
40-
// // we can successfully proceed to the next token in the sequence
41-
// if (token instanceof Word) {
42-
// if (token.text === 'file') {
43-
// this.currentTokens.push(token);
44-
45-
// return {
46-
// result: 'success',
47-
// nextParser: this,
48-
// wasTokenConsumed: true,
49-
// };
50-
// }
51-
52-
// return {
53-
// result: 'failure',
54-
// wasTokenConsumed: false,
55-
// };
56-
// }
57-
58-
// // if we receive the `:` token, we can successfully proceed to the next
59-
// // token in the sequence `only if` the previous token was a `file` word
60-
// // therefore for currently tokens sequence equivalent to the `#file` text
61-
// if (token instanceof Colon) {
62-
// const lastToken = this.currentTokens[this.currentTokens.length - 1];
63-
64-
// if (lastToken instanceof Word) {
65-
// this.currentTokens.push(token);
66-
67-
// return {
68-
// result: 'success',
69-
// nextParser: new PartialPromptFileReference(this.currentTokens),
70-
// wasTokenConsumed: true,
71-
// };
72-
// }
73-
// }
74-
75-
// // all other cases are failures and we don't consume the offending token
76-
// return {
77-
// result: 'failure',
78-
// wasTokenConsumed: false,
79-
// };
80-
// }
81-
// }
82-
83-
// /**
84-
// * List of characters that stop a prompt variable sequence.
85-
// */
86-
// const PROMPT_FILE_REFERENCE_STOP_CHARACTERS: readonly string[] = [Space, Tab, CarriageReturn, NewLine, VerticalTab, FormFeed]
87-
// .map((token) => { return token.symbol; });
88-
89-
// /**
90-
// * Parser responsible for processing the `file reference` syntax part from
91-
// * a sequence of tokens (e.g., #variable:`./some/file/path.md`).
92-
// *
93-
// * The parsing process starts with the sequence of `#`, `file`, and `:` tokens,
94-
// * then can accept a sequence of tokens until one of the tokens defined in
95-
// * the {@linkcode PROMPT_FILE_REFERENCE_STOP_CHARACTERS} list is encountered.
96-
// * This sequence of tokens is treated as a `file path` part of the `#file:` variable,
97-
// * and in the successful case, the parser transitions into the {@linkcode FileReference}
98-
// * token which signifies the end of the file reference text parsing process.
99-
// */
100-
// class PartialPromptFileReference extends ParserBase<TMarkdownToken, PartialPromptFileReference | FileReference> {
101-
// /**
102-
// * Set of tokens that were accumulated so far.
103-
// */
104-
// private readonly fileReferenceTokens: (Hash | Word | Colon)[];
105-
106-
// constructor(tokens: (Hash | Word | Colon)[]) {
107-
// super([]);
108-
109-
// this.fileReferenceTokens = tokens;
110-
// }
111-
112-
// /**
113-
// * List of tokens that were accumulated so far.
114-
// */
115-
// public override get tokens(): readonly (Hash | Word | Colon)[] {
116-
// return [...this.fileReferenceTokens, ...this.currentTokens];
117-
// }
118-
119-
// /**
120-
// * Return the `FileReference` instance created from the current object.
121-
// */
122-
// public asFileReference(): FileReference {
123-
// // use only tokens in the `currentTokens` list to
124-
// // create the path component of the file reference
125-
// const path = this.currentTokens
126-
// .map((token) => { return token.text; })
127-
// .join('');
128-
129-
// const firstToken = this.tokens[0];
130-
131-
// const range = new Range(
132-
// firstToken.range.startLineNumber,
133-
// firstToken.range.startColumn,
134-
// firstToken.range.startLineNumber,
135-
// firstToken.range.startColumn + FileReference.TOKEN_START.length + path.length,
136-
// );
137-
138-
// return new FileReference(range, path);
139-
// }
140-
141-
// public accept(token: TMarkdownToken): TAcceptTokenResult<PartialPromptFileReference | FileReference> {
142-
// // any of stop characters is are breaking a prompt variable sequence
143-
// if (PROMPT_FILE_REFERENCE_STOP_CHARACTERS.includes(token.text)) {
144-
// return {
145-
// result: 'success',
146-
// wasTokenConsumed: false,
147-
// nextParser: this.asFileReference(),
148-
// };
149-
// }
150-
151-
// // any other token can be included in the sequence so accumulate
152-
// // it and continue with using the current parser instance
153-
// this.currentTokens.push(token);
154-
// return {
155-
// result: 'success',
156-
// wasTokenConsumed: true,
157-
// nextParser: this,
158-
// };
159-
// }
160-
// }
161-
16222
/**
16323
* Decoder for the common chatbot prompt message syntax.
16424
* For instance, the file references `#file:./path/file.md` are handled by this decoder.
@@ -180,7 +40,7 @@ export class ChatPromptDecoder extends BaseDecoder<TChatPromptToken, TMarkdownTo
18040
protected override onStreamData(token: TMarkdownToken): void {
18141
// prompt variables always start with the `#` character, hence
18242
// initiate a parser object if we encounter respective token and
183-
/// there is no active parser object present at the moment
43+
// there is no active parser object present at the moment
18444
if (token instanceof Hash && !this.current) {
18545
this.current = new PartialPromptVariableName(token);
18646

src/vs/workbench/contrib/chat/common/promptSyntax/codecs/parsers/promptVariableParser.ts

Lines changed: 12 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,6 @@ export class PartialPromptVariableName extends ParserBase<TSimpleToken, PartialP
128128
const firstToken = this.currentTokens[0];
129129
const lastToken = this.currentTokens[this.currentTokens.length - 1];
130130

131-
// TODO: @lego - validate that first and last tokens are defined?
132-
133131
// render the characters above into strings, excluding the starting `#` character
134132
const variableNameTokens = this.currentTokens.slice(1);
135133
const variableName = variableNameTokens.map(pick('text')).join('');
@@ -153,7 +151,7 @@ export class PartialPromptVariableName extends ParserBase<TSimpleToken, PartialP
153151
export class PartialPromptVariableWithData extends ParserBase<TSimpleToken, PartialPromptVariableWithData | PromptVariableWithData> {
154152

155153
constructor(tokens: readonly TSimpleToken[]) {
156-
const firstToken = tokens;
154+
const firstToken = tokens[0];
157155
const lastToken = tokens[tokens.length - 1];
158156

159157
// sanity checks of our expectations about the tokens list
@@ -180,46 +178,38 @@ export class PartialPromptVariableWithData extends ParserBase<TSimpleToken, Part
180178
// in any case, success of failure below, this is an end of the parsing process
181179
this.isConsumed = true;
182180

183-
// if no tokens received after initial set of tokens, fail
184-
if (this.currentTokens.length === this.startTokensCount) {
185-
return {
186-
result: 'failure',
187-
wasTokenConsumed: false,
188-
};
189-
}
181+
const firstToken = this.currentTokens[0];
182+
const lastToken = this.currentTokens[this.currentTokens.length - 1];
190183

191184
// tokens representing variable name without the `#` character at the start and
192185
// the `:` data separator character at the end
193186
const variableNameTokens = this.currentTokens.slice(1, this.startTokensCount - 1);
194187
// tokens representing variable data without the `:` separator character at the start
195188
const variableDataTokens = this.currentTokens.slice(this.startTokensCount);
189+
// compute the full range of the variable token
190+
const fullRange = new Range(
191+
firstToken.range.startLineNumber,
192+
firstToken.range.startColumn,
193+
lastToken.range.endLineNumber,
194+
lastToken.range.endColumn,
195+
);
196196

197197
// render the characters above into strings
198198
const variableName = variableNameTokens.map(pick('text')).join('');
199199
const variableData = variableDataTokens.map(pick('text')).join('');
200200

201-
const firstToken = this.currentTokens[0];
202-
const lastToken = this.currentTokens[this.currentTokens.length - 1];
203-
204-
// TODO: @lego - validate that first and last tokens are defined?
205-
206201
return {
207202
result: 'success',
208203
nextParser: new PromptVariableWithData(
209-
new Range(
210-
firstToken.range.startLineNumber,
211-
firstToken.range.startColumn,
212-
lastToken.range.endLineNumber,
213-
lastToken.range.endColumn,
214-
),
204+
fullRange,
215205
variableName,
216206
variableData,
217207
),
218208
wasTokenConsumed: false,
219209
};
220210
}
221211

222-
// otherwise, a valid data character - the data can contain almost any character,
212+
// otherwise, token is a valid data character - the data can contain almost any character,
223213
// including `:` and `#`, hence add it to the list of the current tokens and continue
224214
this.currentTokens.push(token);
225215

@@ -232,17 +222,8 @@ export class PartialPromptVariableWithData extends ParserBase<TSimpleToken, Part
232222

233223
/**
234224
* Try to convert current parser instance into a fully-parsed {@link asPromptVariableWithData} token.
235-
*
236-
* @throws if sequence of tokens received so far do not constitute a valid prompt variable with data.
237225
*/
238226
public asPromptVariableWithData(): PromptVariableWithData {
239-
// if no tokens received after initial set of tokens, fail
240-
// TODO: @lego - allow this to emit `#file:` tokens? (without path)
241-
assert(
242-
this.currentTokens.length > this.startTokensCount,
243-
`No 'data' part of the token found.`,
244-
);
245-
246227
// tokens representing variable name without the `#` character at the start and
247228
// the `:` data separator character at the end
248229
const variableNameTokens = this.currentTokens.slice(1, this.startTokensCount - 1);
@@ -256,8 +237,6 @@ export class PartialPromptVariableWithData extends ParserBase<TSimpleToken, Part
256237
const firstToken = this.currentTokens[0];
257238
const lastToken = this.currentTokens[this.currentTokens.length - 1];
258239

259-
// TODO: @lego - validate that first and last tokens are defined?
260-
261240
return new PromptVariableWithData(
262241
new Range(
263242
firstToken.range.startLineNumber,

src/vs/workbench/contrib/chat/common/promptSyntax/codecs/tokens/promptVariable.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
import { PromptToken } from './promptToken.js';
77
import { assert } from '../../../../../../../base/common/assert.js';
8-
import { INVALID_NAME_CHARACTERS, STOP_CHARACTERS } from '../parsers/promptVariableParser.js';
98
import { IRange, Range } from '../../../../../../../editor/common/core/range.js';
109
import { BaseToken } from '../../../../../../../editor/common/codecs/baseToken.js';
10+
import { INVALID_NAME_CHARACTERS, STOP_CHARACTERS } from '../parsers/promptVariableParser.js';
1111

1212
/**
1313
* All prompt variables start with `#` character.
@@ -30,15 +30,12 @@ export class PromptVariable extends PromptToken {
3030
*/
3131
public readonly name: string,
3232
) {
33+
// sanity check of characters used in the provided variable name
3334
for (const character of name) {
3435
assert(
35-
(INVALID_NAME_CHARACTERS.includes(character) === false),
36-
`File name cannot contain character '${character}', got '${name}'.`,
37-
);
38-
39-
assert(
36+
(INVALID_NAME_CHARACTERS.includes(character) === false) &&
4037
(STOP_CHARACTERS.includes(character) === false),
41-
`File name cannot contain character '${character}', got '${name}'.`,
38+
`Variable 'name' cannot contain character '${character}', got '${name}'.`,
4239
);
4340
}
4441

@@ -83,7 +80,6 @@ export class PromptVariable extends PromptToken {
8380
* Represents a {@link PromptVariable} with additional data token in a prompt text.
8481
* (e.g., `#variable:/path/to/file.md`)
8582
*/
86-
// TODO: @legomushroom - allow for empty `path`s?
8783
export class PromptVariableWithData extends PromptVariable {
8884
constructor(
8985
fullRange: Range,
@@ -98,6 +94,14 @@ export class PromptVariableWithData extends PromptVariable {
9894
public readonly data: string,
9995
) {
10096
super(fullRange, name);
97+
98+
// sanity check of characters used in the provided variable data
99+
for (const character of data) {
100+
assert(
101+
(STOP_CHARACTERS.includes(character) === false),
102+
`Variable 'data' cannot contain character '${character}', got '${data}'.`,
103+
);
104+
}
101105
}
102106

103107
/**

src/vs/workbench/contrib/chat/test/common/promptSyntax/codecs/chatPromptCodec.test.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ export class TestChatPromptCodec extends TestDecoder<TChatPromptToken, ChatPromp
4545
suite('ChatPromptCodec', () => {
4646
const testDisposables = ensureNoDisposablesAreLeakedInTestSuite();
4747

48-
test('produces expected tokens', async () => {
48+
test('produces expected tokens', async () => {
4949
const test = testDisposables.add(new TestChatPromptCodec());
5050

5151
await test.run(
52-
'#file:/etc/hosts some text\t\n for #file:./README.md\t testing\n ✔ purposes\n#file:LICENSE.md ✌ \t#file:.gitignore\n\n\n\t #file:/Users/legomushroom/repos/vscode ',
52+
'#file:/etc/hosts some text\t\n for #file:./README.md\t testing\n ✔ purposes\n#file:LICENSE.md ✌ \t#file:.gitignore\n\n\n\t #file:/Users/legomushroom/repos/vscode \n\nsomething #file:\tsomewhere\n',
5353
[
5454
new FileReference(
5555
new Range(1, 1, 1, 1 + 16),
@@ -71,6 +71,10 @@ suite('ChatPromptCodec', () => {
7171
new Range(7, 5, 7, 5 + 38),
7272
'/Users/legomushroom/repos/vscode',
7373
),
74+
new FileReference(
75+
new Range(9, 11, 9, 11 + 6),
76+
'',
77+
),
7478
],
7579
);
7680
});

src/vs/workbench/contrib/chat/test/common/promptSyntax/codecs/chatPromptDecoder.test.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ export class TestChatPromptDecoder extends TestDecoder<TChatPromptToken, ChatPro
4646
suite('ChatPromptDecoder', () => {
4747
const testDisposables = ensureNoDisposablesAreLeakedInTestSuite();
4848

49-
test('produces expected tokens', async () => {
49+
test('produces expected tokens', async () => {
5050
const test = testDisposables.add(
5151
new TestChatPromptDecoder(),
5252
);
@@ -59,6 +59,7 @@ suite('ChatPromptDecoder', () => {
5959
'## Heading Title',
6060
' \t#file:a/b/c/filename2.md\t🖖\t#file:other-file.md',
6161
' [#file:reference.md](./reference.md)some text #file:/some/file/with/absolute/path.md',
62+
'text text #file: another text',
6263
];
6364

6465
await test.run(
@@ -86,6 +87,10 @@ suite('ChatPromptDecoder', () => {
8687
new Range(7, 48, 7, 48 + 38),
8788
'/some/file/with/absolute/path.md',
8889
),
90+
new FileReference(
91+
new Range(8, 11, 8, 11 + 6),
92+
'',
93+
),
8994
],
9095
);
9196
});

0 commit comments

Comments
 (0)