Skip to content

Commit 5182468

Browse files
SONARPY-1236 Use protobuf to cache CPD tokens (#1331)
1 parent 20f7255 commit 5182468

File tree

4 files changed

+43
-18
lines changed

4 files changed

+43
-18
lines changed

python-frontend/src/main/java/org/sonar/python/caching/CpdSerializer.java

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,20 @@
1919
*/
2020
package org.sonar.python.caching;
2121

22-
import java.io.ByteArrayInputStream;
23-
import java.io.ByteArrayOutputStream;
2422
import java.io.IOException;
25-
import java.io.ObjectInputStream;
26-
import java.io.ObjectOutputStream;
27-
import java.io.Serializable;
2823
import java.util.List;
2924
import java.util.stream.Collectors;
3025
import org.sonar.plugins.python.api.tree.Token;
3126
import org.sonar.python.TokenLocation;
27+
import org.sonar.python.types.protobuf.CpdTokenProtos;
3228

3329
public class CpdSerializer {
3430

3531
private CpdSerializer() {
3632
// Prevent instantiation
3733
}
3834

39-
public static final class TokenInfo implements Serializable {
35+
public static final class TokenInfo {
4036
public final int startLine;
4137
public final int startLineOffset;
4238
public final int endLine;
@@ -58,19 +54,29 @@ public TokenInfo(int startLine, int startLineOffset, int endLine, int endLineOff
5854
}
5955

6056
public static byte[] toBytes(List<Token> tokens) throws IOException {
61-
List<TokenInfo> tokenInfos = tokens.stream()
62-
.map(TokenInfo::from)
63-
.collect(Collectors.toList());
57+
CpdTokenProtos.FileCpdTokens.Builder builder = CpdTokenProtos.FileCpdTokens.newBuilder();
6458

65-
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
66-
ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteArrayOutputStream);
67-
objectOutputStream.writeObject(tokenInfos);
59+
for (Token token : tokens) {
60+
TokenLocation location = new TokenLocation(token);
61+
CpdTokenProtos.Token protoToken = CpdTokenProtos.Token.newBuilder()
62+
.setValue(token.value())
63+
.setStartLine(location.startLine())
64+
.setStartLineOffset(location.startLineOffset())
65+
.setEndLine(location.endLine())
66+
.setEndLineOffset(location.endLineOffset())
67+
.build();
6868

69-
return byteArrayOutputStream.toByteArray();
69+
builder.addTokens(protoToken);
70+
}
71+
72+
return builder.build().toByteArray();
7073
}
7174

72-
public static List<TokenInfo> fromBytes(byte[] bytes) throws IOException, ClassNotFoundException {
73-
ObjectInputStream objectInputStream = new ObjectInputStream(new ByteArrayInputStream(bytes));
74-
return (List<TokenInfo>) objectInputStream.readObject();
75+
public static List<TokenInfo> fromBytes(byte[] bytes) throws IOException {
76+
return CpdTokenProtos.FileCpdTokens.parseFrom(bytes)
77+
.getTokensList()
78+
.stream()
79+
.map(proto -> new TokenInfo(proto.getStartLine(), proto.getStartLineOffset(), proto.getEndLine(), proto.getEndLineOffset(), proto.getValue()))
80+
.collect(Collectors.toList());
7581
}
7682
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
syntax = "proto3";
2+
3+
// Not necessary for Python but should still be declared to avoid name collisions
4+
// in the Protocol Buffers namespace and non-Python languages
5+
package protoblog;
6+
option java_outer_classname = "CpdTokenProtos";
7+
option java_package = "org.sonar.python.types.protobuf";
8+
9+
message Token {
10+
string value = 1;
11+
int32 startLine = 2;
12+
int32 startLineOffset = 3;
13+
int32 endLine = 4;
14+
int32 endLineOffset = 5;
15+
}
16+
17+
message FileCpdTokens {
18+
repeated Token tokens = 1;
19+
}

sonar-python-plugin/src/main/java/org/sonar/plugins/python/cpd/PythonCpdAnalyzer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ public boolean pushCachedCpdTokens(InputFile inputFile, CacheContext cacheContex
9090
cpdTokens.save();
9191
cacheContext.getWriteCache().copyFromPrevious(key);
9292
return true;
93-
} catch (IOException | ClassNotFoundException | ClassCastException e) {
93+
} catch (IOException e) {
9494
LOG.warn("Failed to deserialize CPD tokens ({}: {})", e.getClass().getSimpleName(), e.getMessage());
9595
}
9696

sonar-python-plugin/src/test/java/org/sonar/plugins/python/PythonSensorTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1033,7 +1033,7 @@ public void read_cpd_tokens_from_cache_corrupted_format() throws IOException {
10331033
sensor().execute(context);
10341034

10351035
assertThat(logTester.logs(LoggerLevel.WARN))
1036-
.anyMatch(line -> line.startsWith("Failed to deserialize CPD tokens (ClassCastException: class java.lang.String cannot be cast to class java.util.List"));
1036+
.anyMatch(line -> line.startsWith("Failed to deserialize CPD tokens"));
10371037

10381038
// Verify the written CPD tokens
10391039
List<TokensLine> tokensLines = context.cpdTokens("moduleKey:pass.py");

0 commit comments

Comments
 (0)