Skip to content

Commit 116269d

Browse files
authored
Merge pull request #3706 from 1c-syntax/feature/optimize-delta-semantic-tokens
Оптимизация хранения семантических токенов, фикс расчёта дельт
2 parents 6e09504 + 9ee333e commit 116269d

File tree

2 files changed

+221
-34
lines changed

2 files changed

+221
-34
lines changed

src/main/java/com/github/_1c_syntax/bsl/languageserver/providers/SemanticTokensProvider.java

Lines changed: 143 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ private void onDestroy() {
9191
* Cached semantic token data associated with a document.
9292
*
9393
* @param uri URI of the document
94-
* @param data token data list
94+
* @param data token data as int array (more efficient than List<Integer>)
9595
*/
96-
private record CachedTokenData(URI uri, List<Integer> data) {
96+
private record CachedTokenData(URI uri, int[] data) {
9797
}
9898

9999
/**
@@ -110,14 +110,14 @@ public SemanticTokens getSemanticTokensFull(
110110
// Collect tokens from all suppliers in parallel
111111
var entries = collectTokens(documentContext);
112112

113-
// Build delta-encoded data
114-
List<Integer> data = toDeltaEncoded(entries);
113+
// Build delta-encoded data as int array
114+
int[] data = toDeltaEncodedArray(entries);
115115

116116
// Generate a unique resultId and cache the data
117117
String resultId = generateResultId();
118118
cacheTokenData(resultId, documentContext.getUri(), data);
119119

120-
return new SemanticTokens(resultId, data);
120+
return new SemanticTokens(resultId, toList(data));
121121
}
122122

123123
/**
@@ -137,16 +137,16 @@ public Either<SemanticTokens, SemanticTokensDelta> getSemanticTokensFullDelta(
137137
// Collect tokens from all suppliers in parallel
138138
var entries = collectTokens(documentContext);
139139

140-
// Build delta-encoded data
141-
List<Integer> currentData = toDeltaEncoded(entries);
140+
// Build delta-encoded data as int array
141+
int[] currentData = toDeltaEncodedArray(entries);
142142

143143
// Generate new resultId
144144
String resultId = generateResultId();
145145

146146
// If previous data is not available or belongs to a different document, return full tokens
147147
if (previousData == null || !previousData.uri().equals(documentContext.getUri())) {
148148
cacheTokenData(resultId, documentContext.getUri(), currentData);
149-
return Either.forLeft(new SemanticTokens(resultId, currentData));
149+
return Either.forLeft(new SemanticTokens(resultId, toList(currentData)));
150150
}
151151

152152
// Compute delta edits
@@ -207,53 +207,159 @@ private static String generateResultId() {
207207
/**
208208
* Cache token data with the given resultId.
209209
*/
210-
private void cacheTokenData(String resultId, URI uri, List<Integer> data) {
210+
private void cacheTokenData(String resultId, URI uri, int[] data) {
211211
tokenCache.put(resultId, new CachedTokenData(uri, data));
212212
}
213213

214214
/**
215215
* Compute edits to transform previousData into currentData.
216-
* Uses a simple algorithm that produces a single edit covering the entire change.
216+
* <p>
217+
* Учитывает структуру семантических токенов (группы по 5 элементов: deltaLine, deltaStart, length, type, modifiers)
218+
* и смещение строк при вставке/удалении строк в документе.
217219
*/
218-
private static List<SemanticTokensEdit> computeEdits(List<Integer> previousData, List<Integer> currentData) {
219-
// Find the first differing index
220-
int minSize = Math.min(previousData.size(), currentData.size());
221-
int prefixMatch = 0;
222-
while (prefixMatch < minSize && previousData.get(prefixMatch).equals(currentData.get(prefixMatch))) {
223-
prefixMatch++;
220+
private static List<SemanticTokensEdit> computeEdits(int[] prev, int[] curr) {
221+
final int TOKEN_SIZE = 5;
222+
223+
int prevTokenCount = prev.length / TOKEN_SIZE;
224+
int currTokenCount = curr.length / TOKEN_SIZE;
225+
226+
if (prevTokenCount == 0 && currTokenCount == 0) {
227+
return List.of();
224228
}
225229

226-
// If both are identical, return empty edits
227-
if (prefixMatch == previousData.size() && prefixMatch == currentData.size()) {
230+
// Находим первый отличающийся токен и одновременно вычисляем сумму deltaLine для prefix
231+
int firstDiffToken = 0;
232+
int prefixAbsLine = 0;
233+
int minTokens = Math.min(prevTokenCount, currTokenCount);
234+
235+
outer:
236+
for (int i = 0; i < minTokens; i++) {
237+
int base = i * TOKEN_SIZE;
238+
for (int j = 0; j < TOKEN_SIZE; j++) {
239+
if (prev[base + j] != curr[base + j]) {
240+
firstDiffToken = i;
241+
break outer;
242+
}
243+
}
244+
prefixAbsLine += prev[base]; // накапливаем deltaLine
245+
firstDiffToken = i + 1;
246+
}
247+
248+
// Если все токены одинаковые
249+
if (firstDiffToken == minTokens && prevTokenCount == currTokenCount) {
228250
return List.of();
229251
}
230252

231-
// Find the last differing index (from the end)
232-
int suffixMatch = 0;
233-
while (suffixMatch < minSize - prefixMatch
234-
&& previousData.get(previousData.size() - 1 - suffixMatch)
235-
.equals(currentData.get(currentData.size() - 1 - suffixMatch))) {
236-
suffixMatch++;
253+
// Вычисляем смещение строк инкрементально от prefixAbsLine
254+
int prevSuffixAbsLine = prefixAbsLine;
255+
for (int i = firstDiffToken; i < prevTokenCount; i++) {
256+
prevSuffixAbsLine += prev[i * TOKEN_SIZE];
237257
}
258+
int currSuffixAbsLine = prefixAbsLine;
259+
for (int i = firstDiffToken; i < currTokenCount; i++) {
260+
currSuffixAbsLine += curr[i * TOKEN_SIZE];
261+
}
262+
int lineOffset = currSuffixAbsLine - prevSuffixAbsLine;
263+
264+
// Находим последний отличающийся токен с учётом смещения строк
265+
int suffixMatchTokens = findSuffixMatchWithOffset(prev, curr, firstDiffToken, lineOffset, TOKEN_SIZE);
266+
267+
// Вычисляем границы редактирования
268+
int deleteEndToken = prevTokenCount - suffixMatchTokens;
269+
int insertEndToken = currTokenCount - suffixMatchTokens;
238270

239-
// Calculate the range to replace
240-
int deleteStart = prefixMatch;
241-
int deleteCount = previousData.size() - prefixMatch - suffixMatch;
242-
int insertEnd = currentData.size() - suffixMatch;
271+
int deleteStart = firstDiffToken * TOKEN_SIZE;
272+
int deleteCount = (deleteEndToken - firstDiffToken) * TOKEN_SIZE;
273+
int insertEnd = insertEndToken * TOKEN_SIZE;
274+
275+
if (deleteCount == 0 && deleteStart == insertEnd) {
276+
return List.of();
277+
}
243278

244-
// Extract the data to insert
245-
List<Integer> insertData = currentData.subList(prefixMatch, insertEnd);
279+
// Создаём список для вставки из среза массива
280+
List<Integer> insertData = toList(Arrays.copyOfRange(curr, deleteStart, insertEnd));
246281

247282
var edit = new SemanticTokensEdit();
248283
edit.setStart(deleteStart);
249284
edit.setDeleteCount(deleteCount);
250285
if (!insertData.isEmpty()) {
251-
edit.setData(new ArrayList<>(insertData));
286+
edit.setData(insertData);
252287
}
253288

254289
return List.of(edit);
255290
}
256291

292+
/**
293+
* Находит количество совпадающих токенов с конца, учитывая смещение строк.
294+
* <p>
295+
* При дельта-кодировании токены после точки вставки идентичны,
296+
* кроме первого токена, у которого deltaLine смещён на lineOffset.
297+
* При вставке текста без перевода строки (lineOffset == 0), первый токен
298+
* может иметь смещённый deltaStart.
299+
*/
300+
private static int findSuffixMatchWithOffset(int[] prev, int[] curr, int firstDiffToken, int lineOffset, int tokenSize) {
301+
final int DELTA_LINE_INDEX = 0;
302+
final int DELTA_START_INDEX = 1;
303+
304+
int prevTokenCount = prev.length / tokenSize;
305+
int currTokenCount = curr.length / tokenSize;
306+
307+
int maxPrevSuffix = prevTokenCount - firstDiffToken;
308+
int maxCurrSuffix = currTokenCount - firstDiffToken;
309+
int maxSuffix = Math.min(maxPrevSuffix, maxCurrSuffix);
310+
311+
int suffixMatch = 0;
312+
boolean foundBoundary = false;
313+
314+
for (int i = 0; i < maxSuffix; i++) {
315+
int prevIdx = (prevTokenCount - 1 - i) * tokenSize;
316+
int currIdx = (currTokenCount - 1 - i) * tokenSize;
317+
318+
// Для граничного токена при inline-редактировании (lineOffset == 0)
319+
// разрешаем различие в deltaStart
320+
int firstFieldToCheck = (!foundBoundary && lineOffset == 0) ? DELTA_START_INDEX + 1 : DELTA_START_INDEX;
321+
322+
// Проверяем поля кроме deltaLine (и возможно deltaStart для граничного токена)
323+
boolean otherFieldsMatch = true;
324+
for (int j = firstFieldToCheck; j < tokenSize; j++) {
325+
if (prev[prevIdx + j] != curr[currIdx + j]) {
326+
otherFieldsMatch = false;
327+
break;
328+
}
329+
}
330+
331+
if (!otherFieldsMatch) {
332+
break;
333+
}
334+
335+
// Теперь проверяем deltaLine
336+
int prevDeltaLine = prev[prevIdx + DELTA_LINE_INDEX];
337+
int currDeltaLine = curr[currIdx + DELTA_LINE_INDEX];
338+
339+
if (prevDeltaLine == currDeltaLine) {
340+
// Полное совпадение (или совпадение с учётом deltaStart при inline-редактировании)
341+
suffixMatch++;
342+
// Если это был граничный токен при inline-редактировании, отмечаем его найденным
343+
if (!foundBoundary && lineOffset == 0) {
344+
int prevDeltaStart = prev[prevIdx + DELTA_START_INDEX];
345+
int currDeltaStart = curr[currIdx + DELTA_START_INDEX];
346+
if (prevDeltaStart != currDeltaStart) {
347+
foundBoundary = true;
348+
}
349+
}
350+
} else if (!foundBoundary && currDeltaLine - prevDeltaLine == lineOffset) {
351+
// Граничный токен — deltaLine отличается ровно на lineOffset
352+
suffixMatch++;
353+
foundBoundary = true;
354+
} else {
355+
// Не совпадает
356+
break;
357+
}
358+
}
359+
360+
return suffixMatch;
361+
}
362+
257363
/**
258364
* Collect tokens from all suppliers in parallel using ForkJoinPool.
259365
*/
@@ -269,7 +375,7 @@ private List<SemanticTokenEntry> collectTokens(DocumentContext documentContext)
269375
.join();
270376
}
271377

272-
private static List<Integer> toDeltaEncoded(List<SemanticTokenEntry> entries) {
378+
private static int[] toDeltaEncodedArray(List<SemanticTokenEntry> entries) {
273379
// de-dup and sort
274380
Set<SemanticTokenEntry> uniq = new HashSet<>(entries);
275381
List<SemanticTokenEntry> sorted = new ArrayList<>(uniq);
@@ -300,7 +406,10 @@ private static List<Integer> toDeltaEncoded(List<SemanticTokenEntry> entries) {
300406
first = false;
301407
}
302408

303-
// Convert to List<Integer> for LSP4J API
304-
return Arrays.stream(data).boxed().toList();
409+
return data;
410+
}
411+
412+
private static List<Integer> toList(int[] array) {
413+
return Arrays.stream(array).boxed().toList();
305414
}
306415
}

src/test/java/com/github/_1c_syntax/bsl/languageserver/providers/SemanticTokensProviderTest.java

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,6 +1341,84 @@ void deltaWithLineInsertedInMiddle_shouldReturnOptimalDelta() {
13411341
assertThat(editSize).isLessThan(originalDataSize);
13421342
}
13431343

1344+
@Test
1345+
void deltaWithTextInsertedOnSameLine_shouldReturnOptimalDelta() {
1346+
// given - simulate inserting text on the same line without line breaks
1347+
// This tests the case raised by @nixel2007: text insertion without newline
1348+
String bsl1 = """
1349+
Перем А;
1350+
""";
1351+
1352+
String bsl2 = """
1353+
Перем Новая, А;
1354+
""";
1355+
1356+
DocumentContext context1 = TestUtils.getDocumentContext(bsl1);
1357+
referenceIndexFiller.fill(context1);
1358+
TextDocumentIdentifier textDocId1 = TestUtils.getTextDocumentIdentifier(context1.getUri());
1359+
SemanticTokens tokens1 = provider.getSemanticTokensFull(context1, new SemanticTokensParams(textDocId1));
1360+
1361+
// Verify original tokens structure
1362+
var decoded1 = decode(tokens1.getData());
1363+
var expected1 = List.of(
1364+
new ExpectedToken(0, 0, 5, SemanticTokenTypes.Keyword, "Перем"),
1365+
new ExpectedToken(0, 6, 1, SemanticTokenTypes.Variable, SemanticTokenModifiers.Definition, "А"),
1366+
new ExpectedToken(0, 7, 1, SemanticTokenTypes.Operator, ";")
1367+
);
1368+
assertTokensMatch(decoded1, expected1);
1369+
1370+
DocumentContext context2 = TestUtils.getDocumentContext(context1.getUri(), bsl2);
1371+
referenceIndexFiller.fill(context2);
1372+
SemanticTokens tokens2 = provider.getSemanticTokensFull(context2, new SemanticTokensParams(textDocId1));
1373+
1374+
// Verify modified tokens structure
1375+
var decoded2 = decode(tokens2.getData());
1376+
var expected2 = List.of(
1377+
new ExpectedToken(0, 0, 5, SemanticTokenTypes.Keyword, "Перем"),
1378+
new ExpectedToken(0, 6, 5, SemanticTokenTypes.Variable, SemanticTokenModifiers.Definition, "Новая"),
1379+
new ExpectedToken(0, 11, 1, SemanticTokenTypes.Operator, ","),
1380+
new ExpectedToken(0, 13, 1, SemanticTokenTypes.Variable, SemanticTokenModifiers.Definition, "А"),
1381+
new ExpectedToken(0, 14, 1, SemanticTokenTypes.Operator, ";")
1382+
);
1383+
assertTokensMatch(decoded2, expected2);
1384+
1385+
// when
1386+
var deltaParams = new SemanticTokensDeltaParams(textDocId1, tokens1.getResultId());
1387+
var result = provider.getSemanticTokensFullDelta(context2, deltaParams);
1388+
1389+
// then - should return delta, not full tokens
1390+
assertThat(result.isRight()).isTrue();
1391+
var delta = result.getRight();
1392+
assertThat(delta.getEdits()).isNotEmpty();
1393+
assertThat(delta.getEdits()).hasSize(1);
1394+
1395+
// Verify the delta edit details
1396+
// Original: [Перем, А, ;] - 3 tokens = 15 integers
1397+
// Modified: [Перем, Новая, ,, А, ;] - 5 tokens = 25 integers
1398+
//
1399+
// With lineOffset=0 inline edit handling:
1400+
// - Prefix match: "Перем" (1 token = 5 integers)
1401+
// - Suffix match: "А" and ";" (2 tokens = 10 integers)
1402+
// Note: "А" matches because the algorithm allows deltaStart to differ when lineOffset=0
1403+
// - Edit deletes: nothing (0 integers)
1404+
// - Edit inserts: "Новая" and "," (2 tokens = 10 integers)
1405+
var edit = delta.getEdits().get(0);
1406+
assertThat(edit.getStart())
1407+
.as("Edit should start after the prefix match (Перем = 5 integers)")
1408+
.isEqualTo(5);
1409+
assertThat(edit.getDeleteCount())
1410+
.as("Edit should delete nothing (suffix match includes А and ;)")
1411+
.isEqualTo(0);
1412+
assertThat(edit.getData())
1413+
.as("Edit should insert Новая and , tokens (2 tokens = 10 integers)")
1414+
.isNotNull()
1415+
.hasSize(10);
1416+
1417+
// Verify the edit is optimal (smaller than sending all new tokens)
1418+
int editSize = edit.getDeleteCount() + edit.getData().size();
1419+
assertThat(editSize).isLessThan(tokens2.getData().size());
1420+
}
1421+
13441422
// endregion
13451423
}
13461424

0 commit comments

Comments
 (0)