Skip to content

Commit 8d1aab5

Browse files
authored
Add scope filtering for symbol extraction (#8676)
Added specific filters for Avro, Protobuf and Wire libraries. Generated classes for those are not useful to have symbols for.
1 parent bed9326 commit 8d1aab5

File tree

12 files changed

+329
-11
lines changed

12 files changed

+329
-11
lines changed

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/DebuggerAgent.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,12 @@
1111
import com.datadog.debugger.sink.ProbeStatusSink;
1212
import com.datadog.debugger.sink.SnapshotSink;
1313
import com.datadog.debugger.sink.SymbolSink;
14+
import com.datadog.debugger.symbol.AvroFilter;
15+
import com.datadog.debugger.symbol.ProtoFilter;
16+
import com.datadog.debugger.symbol.ScopeFilter;
1417
import com.datadog.debugger.symbol.SymDBEnablement;
1518
import com.datadog.debugger.symbol.SymbolAggregator;
19+
import com.datadog.debugger.symbol.WireFilter;
1620
import com.datadog.debugger.uploader.BatchUploader;
1721
import com.datadog.debugger.util.ClassNameFiltering;
1822
import com.datadog.debugger.util.DebuggerMetrics;
@@ -41,7 +45,9 @@
4145
import java.nio.file.Path;
4246
import java.nio.file.Paths;
4347
import java.time.Duration;
48+
import java.util.Arrays;
4449
import java.util.Collections;
50+
import java.util.List;
4551
import java.util.concurrent.atomic.AtomicBoolean;
4652
import java.util.stream.Collectors;
4753
import java.util.zip.ZipOutputStream;
@@ -155,9 +161,14 @@ public static void startDynamicInstrumentation() {
155161
if (configurationPoller != null) {
156162
if (config.isSymbolDatabaseEnabled()) {
157163
initClassNameFilter();
164+
List<ScopeFilter> scopeFilters =
165+
Arrays.asList(new AvroFilter(), new ProtoFilter(), new WireFilter());
158166
SymbolAggregator symbolAggregator =
159167
new SymbolAggregator(
160-
classNameFilter, sink.getSymbolSink(), config.getSymbolDatabaseFlushThreshold());
168+
classNameFilter,
169+
scopeFilters,
170+
sink.getSymbolSink(),
171+
config.getSymbolDatabaseFlushThreshold());
161172
symbolAggregator.start();
162173
symDBEnablement =
163174
new SymDBEnablement(instrumentation, config, symbolAggregator, classNameFilter);
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.datadog.debugger.symbol;
2+
3+
public class AvroFilter implements ScopeFilter {
4+
@Override
5+
public boolean filterOut(Scope scope) {
6+
if (scope == null) {
7+
return false;
8+
}
9+
LanguageSpecifics languageSpecifics = scope.getLanguageSpecifics();
10+
if (languageSpecifics != null) {
11+
String superClass = languageSpecifics.getSuperClass();
12+
// Allow Avro data classes that extend SpecificRecordBase.
13+
if ("org.apache.avro.specific.SpecificRecordBase".equals(superClass)) {
14+
return false;
15+
}
16+
}
17+
// Filter out classes that appear to be just schema wrappers.
18+
if (scope.getScopeType() == ScopeType.CLASS
19+
&& scope.getSymbols() != null
20+
&& scope.getSymbols().stream()
21+
.anyMatch(
22+
it ->
23+
it.getSymbolType() == SymbolType.STATIC_FIELD
24+
&& "SCHEMA$".equals(it.getName())
25+
&& it.getType() != null
26+
&& it.getType().contains("org.apache.avro.Schema"))) {
27+
return true;
28+
}
29+
// Otherwise, do not filter.
30+
return false;
31+
}
32+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package com.datadog.debugger.symbol;
2+
3+
import java.util.List;
4+
5+
public class ProtoFilter implements ScopeFilter {
6+
@Override
7+
public boolean filterOut(Scope scope) {
8+
if (scope == null) {
9+
return false;
10+
}
11+
LanguageSpecifics languageSpecifics = scope.getLanguageSpecifics();
12+
if (languageSpecifics != null) {
13+
List<String> interfaces = languageSpecifics.getInterfaces();
14+
if (interfaces != null) {
15+
if (interfaces.contains("com.google.protobuf.MessageOrBuilder")) {
16+
// MessageOrBuilder is an interface implemented by both message classes and their
17+
// builders.
18+
// Scopes implementing this interface are filtered out because they do not represent
19+
// concrete data structures but rather interfaces for accessing or building messages.
20+
return true;
21+
}
22+
}
23+
String superClass = languageSpecifics.getSuperClass();
24+
if ("com.google.protobuf.AbstractParser".equals(superClass)) {
25+
// AbstractParser is a base class for parsing protobuf messages. Scopes with this super
26+
// class are filtered out because they are utility classes for parsing and do not contain
27+
// actual data fields.
28+
return true;
29+
}
30+
if ("com.google.protobuf.GeneratedMessageV3$Builder".equals(superClass)) {
31+
// GeneratedMessageV3$Builder is a builder class for constructing GeneratedMessageV3
32+
// instances. These scopes are filtered out because they are used for building messages and
33+
// do not represent the final data structure.
34+
return true;
35+
}
36+
}
37+
// If none of the above matched, see if the class has a proto descriptor field. This is the case
38+
// for wrapper
39+
// classes (`OuterClass`) and `Enum` classes. They contain metadata, not data.
40+
if (hasProtoDescriptorField(scope)) {
41+
return true;
42+
}
43+
// Probably no protobuf, pass
44+
return false;
45+
}
46+
47+
private boolean hasProtoDescriptorField(Scope scope) {
48+
return scope.getScopeType() == ScopeType.CLASS
49+
&& scope.getSymbols() != null
50+
&& scope.getSymbols().stream()
51+
.anyMatch(
52+
it ->
53+
it.getSymbolType() == SymbolType.STATIC_FIELD
54+
&& it.getType() != null
55+
&& it.getType().contains("com.google.protobuf.Descriptors"));
56+
}
57+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package com.datadog.debugger.symbol;
2+
3+
public interface ScopeFilter {
4+
/** returns true if the scope should be excluded */
5+
boolean filterOut(Scope scope);
6+
}

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/symbol/SymbolAggregator.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public class SymbolAggregator {
3838
private static final int CLASSFILE_BUFFER_SIZE = 8192;
3939

4040
private final DebuggerContext.ClassNameFilter classNameFilter;
41+
private final List<ScopeFilter> scopeFilters;
4142
private final SymbolSink sink;
4243
private final int symbolFlushThreshold;
4344
private final Map<String, Scope> jarScopesByName = new HashMap<>();
@@ -51,8 +52,12 @@ public class SymbolAggregator {
5152
private final Set<String> alreadyScannedJars = ConcurrentHashMap.newKeySet();
5253

5354
public SymbolAggregator(
54-
DebuggerContext.ClassNameFilter classNameFilter, SymbolSink sink, int symbolFlushThreshold) {
55+
DebuggerContext.ClassNameFilter classNameFilter,
56+
List<ScopeFilter> scopeFilters,
57+
SymbolSink sink,
58+
int symbolFlushThreshold) {
5559
this.classNameFilter = classNameFilter;
60+
this.scopeFilters = scopeFilters;
5661
this.sink = sink;
5762
this.symbolFlushThreshold = symbolFlushThreshold;
5863
}
@@ -119,10 +124,18 @@ public void parseClass(
119124
}
120125
LOGGER.debug("Extracting Symbols from: {}, located in: {}", className, jarName);
121126
Scope jarScope = SymbolExtractor.extract(classfileBuffer, jarName);
127+
jarScope = applyFilters(jarScope);
122128
addJarScope(jarScope, false);
123129
symDBReport.incClassCount(jarName);
124130
}
125131

132+
private Scope applyFilters(Scope jarScope) {
133+
for (ScopeFilter filter : scopeFilters) {
134+
jarScope.getScopes().removeIf(filter::filterOut);
135+
}
136+
return jarScope;
137+
}
138+
126139
private void flushRemainingScopes(SymbolAggregator symbolAggregator) {
127140
synchronized (jarScopeLock) {
128141
if (jarScopesByName.isEmpty()) {
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package com.datadog.debugger.symbol;
2+
3+
import java.util.List;
4+
5+
public class WireFilter implements ScopeFilter {
6+
@Override
7+
public boolean filterOut(Scope scope) {
8+
// Filter out classes generated by Square Wire: https://square.github.io/wire/
9+
if (scope == null) {
10+
return false;
11+
}
12+
LanguageSpecifics languageSpecifics = scope.getLanguageSpecifics();
13+
if (languageSpecifics == null) {
14+
return false;
15+
}
16+
List<String> interfaces = languageSpecifics.getInterfaces();
17+
if (interfaces != null) {
18+
if (interfaces.contains("com.squareup.wire.Message")) {
19+
// Pass-through for Message since it contains data
20+
return false;
21+
}
22+
if (interfaces.stream().anyMatch(it -> it.startsWith("com.squareup.wire"))) {
23+
return true;
24+
}
25+
}
26+
String superClass = languageSpecifics.getSuperClass();
27+
if (superClass != null) {
28+
if (superClass.startsWith("com.squareup.wire")) {
29+
return true;
30+
}
31+
}
32+
// Probably no protobuf, pass
33+
return false;
34+
}
35+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package com.datadog.debugger.symbol;
2+
3+
import static java.util.Arrays.asList;
4+
import static org.junit.jupiter.api.Assertions.*;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
class AvroFilterTest {
9+
@Test
10+
void filterOut() {
11+
AvroFilter avroFilter = new AvroFilter();
12+
assertFalse(avroFilter.filterOut(null));
13+
Scope scope = Scope.builder(ScopeType.CLASS, "", 0, 0).build();
14+
assertFalse(avroFilter.filterOut(scope));
15+
scope = Scope.builder(ScopeType.CLASS, "", 0, 0).name("org.apache.avro.MyClass").build();
16+
assertFalse(avroFilter.filterOut(scope));
17+
scope =
18+
Scope.builder(ScopeType.CLASS, "", 0, 0)
19+
.languageSpecifics(
20+
new LanguageSpecifics.Builder()
21+
.superClass("org.apache.avro.specific.SpecificRecordBase")
22+
.build())
23+
.build();
24+
assertFalse(avroFilter.filterOut(scope));
25+
scope =
26+
Scope.builder(ScopeType.CLASS, "", 0, 0)
27+
.symbols(
28+
asList(
29+
new Symbol(
30+
SymbolType.STATIC_FIELD, "SCHEMA$", 0, "org.apache.avro.Schema", null)))
31+
.build();
32+
assertTrue(avroFilter.filterOut(scope));
33+
}
34+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package com.datadog.debugger.symbol;
2+
3+
import static java.util.Arrays.asList;
4+
import static org.junit.jupiter.api.Assertions.*;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
class ProtoFilterTest {
9+
@Test
10+
void filterOut() {
11+
ProtoFilter protoFilter = new ProtoFilter();
12+
assertFalse(protoFilter.filterOut(null));
13+
Scope scope = Scope.builder(ScopeType.CLASS, "", 0, 0).build();
14+
assertFalse(protoFilter.filterOut(scope));
15+
scope = Scope.builder(ScopeType.CLASS, "", 0, 0).name("com.google.protobuf.MyClass").build();
16+
assertFalse(protoFilter.filterOut(scope));
17+
scope =
18+
Scope.builder(ScopeType.CLASS, "", 0, 0)
19+
.languageSpecifics(
20+
new LanguageSpecifics.Builder()
21+
.addInterfaces(asList("com.google.protobuf.MessageOrBuilder"))
22+
.build())
23+
.build();
24+
assertTrue(protoFilter.filterOut(scope));
25+
scope =
26+
Scope.builder(ScopeType.CLASS, "", 0, 0)
27+
.languageSpecifics(
28+
new LanguageSpecifics.Builder()
29+
.superClass("com.google.protobuf.AbstractParser")
30+
.build())
31+
.build();
32+
assertTrue(protoFilter.filterOut(scope));
33+
scope =
34+
Scope.builder(ScopeType.CLASS, "", 0, 0)
35+
.languageSpecifics(
36+
new LanguageSpecifics.Builder()
37+
.superClass("com.google.protobuf.GeneratedMessageV3$Builder")
38+
.build())
39+
.build();
40+
assertTrue(protoFilter.filterOut(scope));
41+
scope =
42+
Scope.builder(ScopeType.CLASS, "", 0, 0)
43+
.symbols(
44+
asList(
45+
new Symbol(
46+
SymbolType.STATIC_FIELD,
47+
"SCHEMA$",
48+
0,
49+
"com.google.protobuf.Descriptors",
50+
null)))
51+
.build();
52+
assertTrue(protoFilter.filterOut(scope));
53+
}
54+
}

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/symbol/SymDBEnablementTest.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.datadog.debugger.symbol;
22

3+
import static java.util.Collections.emptyList;
34
import static org.junit.jupiter.api.Assertions.assertEquals;
45
import static org.junit.jupiter.api.Assertions.assertFalse;
56
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -63,7 +64,7 @@ public void enableDisableSymDBThroughRC() throws Exception {
6364
new SymDBEnablement(
6465
instr,
6566
config,
66-
new SymbolAggregator(classNameFiltering, symbolSink, 1),
67+
new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1),
6768
classNameFiltering);
6869
symDBEnablement.accept(ParsedConfigKey.parse(CONFIG_KEY), UPlOAD_SYMBOL_TRUE, null);
6970
waitForUpload(symDBEnablement);
@@ -79,7 +80,7 @@ public void removeSymDBConfig() throws Exception {
7980
new SymDBEnablement(
8081
instr,
8182
config,
82-
new SymbolAggregator(classNameFiltering, symbolSink, 1),
83+
new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1),
8384
classNameFiltering);
8485
symDBEnablement.accept(ParsedConfigKey.parse(CONFIG_KEY), UPlOAD_SYMBOL_TRUE, null);
8586
waitForUpload(symDBEnablement);
@@ -96,7 +97,7 @@ public void noIncludesFilterOutDatadogClass() {
9697
new SymDBEnablement(
9798
instr,
9899
config,
99-
new SymbolAggregator(classNameFiltering, symbolSink, 1),
100+
new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1),
100101
classNameFiltering);
101102
symDBEnablement.startSymbolExtraction();
102103
ArgumentCaptor<SymbolExtractionTransformer> captor =
@@ -122,7 +123,7 @@ public void parseLoadedClass() throws ClassNotFoundException, IOException {
122123
.collect(Collectors.toSet()));
123124
ClassNameFiltering classNameFiltering = ClassNameFiltering.allowAll();
124125
SymbolAggregator symbolAggregator =
125-
spy(new SymbolAggregator(classNameFiltering, symbolSink, 1));
126+
spy(new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1));
126127
SymDBEnablement symDBEnablement =
127128
new SymDBEnablement(instr, config, symbolAggregator, classNameFiltering);
128129
symDBEnablement.startSymbolExtraction();
@@ -150,7 +151,7 @@ public void parseLoadedClassFromDirectory()
150151
.collect(Collectors.toSet()));
151152
ClassNameFiltering classNameFiltering = ClassNameFiltering.allowAll();
152153
SymbolAggregator symbolAggregator =
153-
spy(new SymbolAggregator(classNameFiltering, symbolSink, 1));
154+
spy(new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1));
154155
SymDBEnablement symDBEnablement =
155156
new SymDBEnablement(instr, config, symbolAggregator, classNameFiltering);
156157
symDBEnablement.startSymbolExtraction();
@@ -171,7 +172,8 @@ public void noDuplicateSymbolExtraction() {
171172
Collections.singleton("org.springframework."),
172173
Collections.singleton("com.datadog.debugger."),
173174
Collections.emptySet());
174-
SymbolAggregator symbolAggregator = new SymbolAggregator(classNameFiltering, mockSymbolSink, 1);
175+
SymbolAggregator symbolAggregator =
176+
new SymbolAggregator(classNameFiltering, emptyList(), mockSymbolSink, 1);
175177
SymDBEnablement symDBEnablement =
176178
new SymDBEnablement(instr, config, symbolAggregator, classNameFiltering);
177179
doAnswer(

0 commit comments

Comments
 (0)