Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ private void onDestroy() {
* Cached semantic token data associated with a document.
*
* @param uri URI of the document
* @param data token data list
* @param data token data as int array (more efficient than List<Integer>)
*/
private record CachedTokenData(URI uri, List<Integer> data) {
private record CachedTokenData(URI uri, int[] data) {
}

/**
Expand All @@ -110,14 +110,14 @@ public SemanticTokens getSemanticTokensFull(
// Collect tokens from all suppliers in parallel
var entries = collectTokens(documentContext);

// Build delta-encoded data
List<Integer> data = toDeltaEncoded(entries);
// Build delta-encoded data as int array
int[] data = toDeltaEncodedArray(entries);

// Generate a unique resultId and cache the data
String resultId = generateResultId();
cacheTokenData(resultId, documentContext.getUri(), data);

return new SemanticTokens(resultId, data);
return new SemanticTokens(resultId, toList(data));
}

/**
Expand All @@ -137,16 +137,16 @@ public Either<SemanticTokens, SemanticTokensDelta> getSemanticTokensFullDelta(
// Collect tokens from all suppliers in parallel
var entries = collectTokens(documentContext);

// Build delta-encoded data
List<Integer> currentData = toDeltaEncoded(entries);
// Build delta-encoded data as int array
int[] currentData = toDeltaEncodedArray(entries);

// Generate new resultId
String resultId = generateResultId();

// If previous data is not available or belongs to a different document, return full tokens
if (previousData == null || !previousData.uri().equals(documentContext.getUri())) {
cacheTokenData(resultId, documentContext.getUri(), currentData);
return Either.forLeft(new SemanticTokens(resultId, currentData));
return Either.forLeft(new SemanticTokens(resultId, toList(currentData)));
}

// Compute delta edits
Expand Down Expand Up @@ -207,53 +207,159 @@ private static String generateResultId() {
/**
* Cache token data with the given resultId.
*/
private void cacheTokenData(String resultId, URI uri, List<Integer> data) {
private void cacheTokenData(String resultId, URI uri, int[] data) {
tokenCache.put(resultId, new CachedTokenData(uri, data));
}

/**
* Compute edits to transform previousData into currentData.
* Uses a simple algorithm that produces a single edit covering the entire change.
* <p>
* Учитывает структуру семантических токенов (группы по 5 элементов: deltaLine, deltaStart, length, type, modifiers)
* и смещение строк при вставке/удалении строк в документе.
*/
private static List<SemanticTokensEdit> computeEdits(List<Integer> previousData, List<Integer> currentData) {
// Find the first differing index
int minSize = Math.min(previousData.size(), currentData.size());
int prefixMatch = 0;
while (prefixMatch < minSize && previousData.get(prefixMatch).equals(currentData.get(prefixMatch))) {
prefixMatch++;
private static List<SemanticTokensEdit> computeEdits(int[] prev, int[] curr) {
final int TOKEN_SIZE = 5;
Copy link

Copilot AI Dec 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The constant TOKEN_SIZE is defined locally in multiple methods (computeEdits and findSuffixMatchWithOffset). Consider extracting this as a class-level private static final constant to avoid duplication and ensure consistency across methods. This would make the code more maintainable if the token structure ever changes.

Copilot uses AI. Check for mistakes.

int prevTokenCount = prev.length / TOKEN_SIZE;
int currTokenCount = curr.length / TOKEN_SIZE;

if (prevTokenCount == 0 && currTokenCount == 0) {
return List.of();
}

// If both are identical, return empty edits
if (prefixMatch == previousData.size() && prefixMatch == currentData.size()) {
// Находим первый отличающийся токен и одновременно вычисляем сумму deltaLine для prefix
int firstDiffToken = 0;
int prefixAbsLine = 0;
int minTokens = Math.min(prevTokenCount, currTokenCount);

outer:
for (int i = 0; i < minTokens; i++) {
int base = i * TOKEN_SIZE;
for (int j = 0; j < TOKEN_SIZE; j++) {
if (prev[base + j] != curr[base + j]) {
firstDiffToken = i;
break outer;
Comment on lines +235 to +241
Copy link

Copilot AI Dec 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The label name 'outer' is too generic and doesn't convey meaningful information about the loop's purpose. Consider using a more descriptive name like 'findFirstDiff' or 'searchPrefixMatch' to make the control flow clearer.

Suggested change
outer:
for (int i = 0; i < minTokens; i++) {
int base = i * TOKEN_SIZE;
for (int j = 0; j < TOKEN_SIZE; j++) {
if (prev[base + j] != curr[base + j]) {
firstDiffToken = i;
break outer;
findFirstDiff:
for (int i = 0; i < minTokens; i++) {
int base = i * TOKEN_SIZE;
for (int j = 0; j < TOKEN_SIZE; j++) {
if (prev[base + j] != curr[base + j]) {
firstDiffToken = i;
break findFirstDiff;

Copilot uses AI. Check for mistakes.
}
}
prefixAbsLine += prev[base]; // накапливаем deltaLine
firstDiffToken = i + 1;
}

// Если все токены одинаковые
if (firstDiffToken == minTokens && prevTokenCount == currTokenCount) {
return List.of();
}

// Find the last differing index (from the end)
int suffixMatch = 0;
while (suffixMatch < minSize - prefixMatch
&& previousData.get(previousData.size() - 1 - suffixMatch)
.equals(currentData.get(currentData.size() - 1 - suffixMatch))) {
suffixMatch++;
// Вычисляем смещение строк инкрементально от prefixAbsLine
int prevSuffixAbsLine = prefixAbsLine;
for (int i = firstDiffToken; i < prevTokenCount; i++) {
prevSuffixAbsLine += prev[i * TOKEN_SIZE];
}
int currSuffixAbsLine = prefixAbsLine;
for (int i = firstDiffToken; i < currTokenCount; i++) {
currSuffixAbsLine += curr[i * TOKEN_SIZE];
}
int lineOffset = currSuffixAbsLine - prevSuffixAbsLine;

// Находим последний отличающийся токен с учётом смещения строк
int suffixMatchTokens = findSuffixMatchWithOffset(prev, curr, firstDiffToken, lineOffset, TOKEN_SIZE);

// Вычисляем границы редактирования
int deleteEndToken = prevTokenCount - suffixMatchTokens;
int insertEndToken = currTokenCount - suffixMatchTokens;

// Calculate the range to replace
int deleteStart = prefixMatch;
int deleteCount = previousData.size() - prefixMatch - suffixMatch;
int insertEnd = currentData.size() - suffixMatch;
int deleteStart = firstDiffToken * TOKEN_SIZE;
int deleteCount = (deleteEndToken - firstDiffToken) * TOKEN_SIZE;
int insertEnd = insertEndToken * TOKEN_SIZE;

if (deleteCount == 0 && deleteStart == insertEnd) {
return List.of();
}

Comment on lines +275 to 278
Copy link

Copilot AI Dec 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition check at line 275 may be incorrect. When deleteCount is 0 and deleteStart equals insertEnd, it means there's nothing to delete and nothing to insert. However, this check uses deleteStart (which is firstDiffToken * TOKEN_SIZE) rather than checking if there's actually any difference. A more correct check would be to verify if deleteCount is 0 and the insert range (from deleteStart to insertEnd) is empty, but the current logic already handles this correctly. Consider whether this early return is truly necessary, as the edit creation below would naturally produce an edit with deleteCount=0 and empty insertData, which might be semantically valid for LSP.

Suggested change
if (deleteCount == 0 && deleteStart == insertEnd) {
return List.of();
}

Copilot uses AI. Check for mistakes.
// Extract the data to insert
List<Integer> insertData = currentData.subList(prefixMatch, insertEnd);
// Создаём список для вставки из среза массива
List<Integer> insertData = toList(Arrays.copyOfRange(curr, deleteStart, insertEnd));

var edit = new SemanticTokensEdit();
edit.setStart(deleteStart);
edit.setDeleteCount(deleteCount);
if (!insertData.isEmpty()) {
edit.setData(new ArrayList<>(insertData));
edit.setData(insertData);
}

return List.of(edit);
}
Comment on lines +220 to 290
Copy link

Copilot AI Dec 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test coverage for the new delta calculation algorithm should be expanded. The existing tests (deltaWithLineInsertedAtBeginning and deltaWithLineInsertedInMiddle) verify that the delta is smaller than the full data, but they don't verify the correctness of the actual edit content. Consider adding assertions to verify that applying the edit to the previous data actually produces the current data, to ensure the delta calculation is correct.

Copilot uses AI. Check for mistakes.

/**
* Находит количество совпадающих токенов с конца, учитывая смещение строк.
* <p>
* При дельта-кодировании токены после точки вставки идентичны,
* кроме первого токена, у которого deltaLine смещён на lineOffset.
* При вставке текста без перевода строки (lineOffset == 0), первый токен
* может иметь смещённый deltaStart.
*/
private static int findSuffixMatchWithOffset(int[] prev, int[] curr, int firstDiffToken, int lineOffset, int tokenSize) {
final int DELTA_LINE_INDEX = 0;
final int DELTA_START_INDEX = 1;

int prevTokenCount = prev.length / tokenSize;
int currTokenCount = curr.length / tokenSize;

int maxPrevSuffix = prevTokenCount - firstDiffToken;
int maxCurrSuffix = currTokenCount - firstDiffToken;
int maxSuffix = Math.min(maxPrevSuffix, maxCurrSuffix);

int suffixMatch = 0;
boolean foundBoundary = false;

for (int i = 0; i < maxSuffix; i++) {
int prevIdx = (prevTokenCount - 1 - i) * tokenSize;
int currIdx = (currTokenCount - 1 - i) * tokenSize;

// Для граничного токена при inline-редактировании (lineOffset == 0)
// разрешаем различие в deltaStart
int firstFieldToCheck = (!foundBoundary && lineOffset == 0) ? DELTA_START_INDEX + 1 : DELTA_START_INDEX;

// Проверяем поля кроме deltaLine (и возможно deltaStart для граничного токена)
boolean otherFieldsMatch = true;
for (int j = firstFieldToCheck; j < tokenSize; j++) {
if (prev[prevIdx + j] != curr[currIdx + j]) {
otherFieldsMatch = false;
break;
}
}

if (!otherFieldsMatch) {
break;
}

// Теперь проверяем deltaLine
int prevDeltaLine = prev[prevIdx + DELTA_LINE_INDEX];
int currDeltaLine = curr[currIdx + DELTA_LINE_INDEX];

if (prevDeltaLine == currDeltaLine) {
// Полное совпадение (или совпадение с учётом deltaStart при inline-редактировании)
suffixMatch++;
// Если это был граничный токен при inline-редактировании, отмечаем его найденным
if (!foundBoundary && lineOffset == 0) {
int prevDeltaStart = prev[prevIdx + DELTA_START_INDEX];
int currDeltaStart = curr[currIdx + DELTA_START_INDEX];
if (prevDeltaStart != currDeltaStart) {
foundBoundary = true;
}
}
} else if (!foundBoundary && currDeltaLine - prevDeltaLine == lineOffset) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot учитывает ли это вставку текста без перевода строки?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@coderabbitai ну или ты ответь

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nixel2007, да, код учитывает вставку текста без перевода строки, но обрабатывает её иначе, чем вставку/удаление целых строк.

При вставке текста на той же строке (без \n):

  • Токены на этой строке получат изменённые значения deltaStart (смещение позиции)
  • Токены на последующих строках сохранят те же deltaLine значения

Алгоритм обнаружит такие изменения через проверку всех полей токена в findSuffixMatchWithOffset:

// Сначала проверяем все поля кроме deltaLine
for (int j = 1; j < tokenSize; j++) {  // j=1 это deltaStart
  if (prev[prevIdx + j] != curr[currIdx + j]) {
    otherFieldsMatch = false;
    break;
  }
}

Если deltaStart отличается, токены не считаются совпадающими и попадут в область редактирования.

Оптимизация с lineOffset специально работает для вертикальных сдвигов (вставка/удаление строк), когда deltaLine изменяется, но сами токены остаются идентичными. Для горизонтальных сдвигов (вставка в той же строке) такой оптимизации нет, но это корректное поведение — позиции токенов действительно изменились и требуют обновления.

// Граничный токен — deltaLine отличается ровно на lineOffset
suffixMatch++;
foundBoundary = true;
} else {
// Не совпадает
break;
}
}

return suffixMatch;
}

/**
* Collect tokens from all suppliers in parallel using ForkJoinPool.
*/
Expand All @@ -269,7 +375,7 @@ private List<SemanticTokenEntry> collectTokens(DocumentContext documentContext)
.join();
}

private static List<Integer> toDeltaEncoded(List<SemanticTokenEntry> entries) {
private static int[] toDeltaEncodedArray(List<SemanticTokenEntry> entries) {
// de-dup and sort
Set<SemanticTokenEntry> uniq = new HashSet<>(entries);
List<SemanticTokenEntry> sorted = new ArrayList<>(uniq);
Expand Down Expand Up @@ -300,7 +406,10 @@ private static List<Integer> toDeltaEncoded(List<SemanticTokenEntry> entries) {
first = false;
}

// Convert to List<Integer> for LSP4J API
return Arrays.stream(data).boxed().toList();
return data;
}

private static List<Integer> toList(int[] array) {
return Arrays.stream(array).boxed().toList();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,84 @@ void deltaWithLineInsertedInMiddle_shouldReturnOptimalDelta() {
assertThat(editSize).isLessThan(originalDataSize);
}

@Test
void deltaWithTextInsertedOnSameLine_shouldReturnOptimalDelta() {
// given - simulate inserting text on the same line without line breaks
// This tests the case raised by @nixel2007: text insertion without newline
String bsl1 = """
Перем А;
""";

String bsl2 = """
Перем Новая, А;
""";

DocumentContext context1 = TestUtils.getDocumentContext(bsl1);
referenceIndexFiller.fill(context1);
TextDocumentIdentifier textDocId1 = TestUtils.getTextDocumentIdentifier(context1.getUri());
SemanticTokens tokens1 = provider.getSemanticTokensFull(context1, new SemanticTokensParams(textDocId1));

// Verify original tokens structure
var decoded1 = decode(tokens1.getData());
var expected1 = List.of(
new ExpectedToken(0, 0, 5, SemanticTokenTypes.Keyword, "Перем"),
new ExpectedToken(0, 6, 1, SemanticTokenTypes.Variable, SemanticTokenModifiers.Definition, "А"),
new ExpectedToken(0, 7, 1, SemanticTokenTypes.Operator, ";")
);
assertTokensMatch(decoded1, expected1);

DocumentContext context2 = TestUtils.getDocumentContext(context1.getUri(), bsl2);
referenceIndexFiller.fill(context2);
SemanticTokens tokens2 = provider.getSemanticTokensFull(context2, new SemanticTokensParams(textDocId1));

// Verify modified tokens structure
var decoded2 = decode(tokens2.getData());
var expected2 = List.of(
new ExpectedToken(0, 0, 5, SemanticTokenTypes.Keyword, "Перем"),
new ExpectedToken(0, 6, 5, SemanticTokenTypes.Variable, SemanticTokenModifiers.Definition, "Новая"),
new ExpectedToken(0, 11, 1, SemanticTokenTypes.Operator, ","),
new ExpectedToken(0, 13, 1, SemanticTokenTypes.Variable, SemanticTokenModifiers.Definition, "А"),
new ExpectedToken(0, 14, 1, SemanticTokenTypes.Operator, ";")
);
assertTokensMatch(decoded2, expected2);

// when
var deltaParams = new SemanticTokensDeltaParams(textDocId1, tokens1.getResultId());
var result = provider.getSemanticTokensFullDelta(context2, deltaParams);

// then - should return delta, not full tokens
assertThat(result.isRight()).isTrue();
var delta = result.getRight();
assertThat(delta.getEdits()).isNotEmpty();
assertThat(delta.getEdits()).hasSize(1);

// Verify the delta edit details
// Original: [Перем, А, ;] - 3 tokens = 15 integers
// Modified: [Перем, Новая, ,, А, ;] - 5 tokens = 25 integers
//
// With lineOffset=0 inline edit handling:
// - Prefix match: "Перем" (1 token = 5 integers)
// - Suffix match: "А" and ";" (2 tokens = 10 integers)
// Note: "А" matches because the algorithm allows deltaStart to differ when lineOffset=0
// - Edit deletes: nothing (0 integers)
// - Edit inserts: "Новая" and "," (2 tokens = 10 integers)
var edit = delta.getEdits().get(0);
assertThat(edit.getStart())
.as("Edit should start after the prefix match (Перем = 5 integers)")
.isEqualTo(5);
assertThat(edit.getDeleteCount())
.as("Edit should delete nothing (suffix match includes А and ;)")
.isEqualTo(0);
assertThat(edit.getData())
.as("Edit should insert Новая and , tokens (2 tokens = 10 integers)")
.isNotNull()
.hasSize(10);

// Verify the edit is optimal (smaller than sending all new tokens)
int editSize = edit.getDeleteCount() + edit.getData().size();
assertThat(editSize).isLessThan(tokens2.getData().size());
}

// endregion
}

Loading