Skip to content

Commit ed6351d

Browse files
authored
Avoid duplicate class symbol extraction (#7919)
Same classes can be loaded in different classloader, but symbol-wise they are the same. In some situation (like Groovy script) we can a lot of class loaded for same symbols which pollute SymDB backend. We need to track all classes loaded and extracted into ClassNameTrie that is space efficient prefix tree. That way we prevent any duplicates reaching SymDB backend
1 parent 0ab8864 commit ed6351d

File tree

3 files changed

+26
-20
lines changed

3 files changed

+26
-20
lines changed

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/symbol/SymDBEnablement.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ public void startSymbolExtraction() {
115115
Instant.ofEpochMilli(lastUploadTimestamp), ZoneId.systemDefault()));
116116
return;
117117
}
118-
symbolAggregator.loadedClassesProcessStarted();
119118
try {
120119
symbolExtractionTransformer =
121120
new SymbolExtractionTransformer(symbolAggregator, classNameFilter);
@@ -125,8 +124,6 @@ public void startSymbolExtraction() {
125124
} catch (Throwable ex) {
126125
// catch all Throwables because LinkageError is possible (duplicate class definition)
127126
LOGGER.debug("Error during symbol extraction: ", ex);
128-
} finally {
129-
symbolAggregator.loadedClassesProcessEnded();
130127
}
131128
} finally {
132129
starting.set(false);

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/symbol/SymbolAggregator.java

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import com.datadog.debugger.sink.SymbolSink;
66
import datadog.trace.util.AgentTaskScheduler;
7+
import datadog.trace.util.ClassNameTrie;
8+
import datadog.trace.util.Strings;
79
import java.nio.file.Files;
810
import java.nio.file.Path;
911
import java.security.ProtectionDomain;
@@ -12,8 +14,6 @@
1214
import java.util.HashMap;
1315
import java.util.List;
1416
import java.util.Map;
15-
import java.util.Set;
16-
import java.util.concurrent.ConcurrentHashMap;
1717
import java.util.concurrent.TimeUnit;
1818
import org.slf4j.Logger;
1919
import org.slf4j.LoggerFactory;
@@ -28,7 +28,8 @@ public class SymbolAggregator {
2828
private final AgentTaskScheduler.Scheduled<SymbolAggregator> scheduled;
2929
private final Object jarScopeLock = new Object();
3030
private int totalClasses;
31-
private volatile Set<String> loadedClasses;
31+
// ClassNameTrie is not thread safe, All accesses must be protected by a lock
32+
private final ClassNameTrie.Builder loadedClasses = new ClassNameTrie.Builder();
3233

3334
public SymbolAggregator(SymbolSink sink, int symbolFlushThreshold) {
3435
this.sink = sink;
@@ -61,10 +62,13 @@ public void parseClass(String className, byte[] classfileBuffer, String jarName)
6162
if (className.endsWith(CLASS_SUFFIX)) {
6263
className = className.substring(0, className.length() - CLASS_SUFFIX.length());
6364
}
64-
Set<String> localLoadedClasses = loadedClasses;
65-
if (localLoadedClasses != null && !localLoadedClasses.add(className)) {
66-
// class already loaded and symbol extracted
67-
return;
65+
synchronized (loadedClasses) {
66+
String fqn = Strings.getClassName(className); // ClassNameTrie expects Java class names ('.')
67+
if (loadedClasses.apply(fqn) > 0) {
68+
// class already loaded and symbol extracted
69+
return;
70+
}
71+
loadedClasses.put(fqn, 1);
6872
}
6973
LOGGER.debug("Extracting Symbols from: {}, located in: {}", className, jarName);
7074
Scope jarScope = SymbolExtractor.extract(classfileBuffer, jarName);
@@ -110,14 +114,4 @@ private void addJarScope(Scope jarScope, boolean forceFlush) {
110114
}
111115
}
112116
}
113-
114-
void loadedClassesProcessStarted() {
115-
// to avoid duplicate symbol extraction we keep track of loaded classes
116-
// during the loaded class extraction phase
117-
loadedClasses = ConcurrentHashMap.newKeySet();
118-
}
119-
120-
void loadedClassesProcessEnded() {
121-
loadedClasses = null;
122-
}
123117
}

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/symbol/SymbolExtractionTransformerTest.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class SymbolExtractionTransformerTest {
4343
"javax.",
4444
"javaslang.",
4545
"org.omg.",
46+
"org.joor.",
4647
"com.datadog.debugger.")
4748
.collect(Collectors.toSet());
4849

@@ -896,6 +897,20 @@ public void filterOutClassesFromExcludedPackages() throws IOException, URISyntax
896897
.anyMatch(scope -> scope.getName().equals(CLASS_NAME)));
897898
}
898899

900+
@Test
901+
public void duplicateClassThroughDifferentClassLoader() throws IOException, URISyntaxException {
902+
final String CLASS_NAME = SYMBOL_PACKAGE + "SymbolExtraction01";
903+
SymbolSinkMock symbolSinkMock = new SymbolSinkMock(config);
904+
SymbolExtractionTransformer transformer = createTransformer(symbolSinkMock);
905+
instr.addTransformer(transformer);
906+
for (int i = 0; i < 10; i++) {
907+
// compile and load the class in a specific ClassLoader each time
908+
Class<?> testClass = compileAndLoadClass(CLASS_NAME);
909+
Reflect.on(testClass).call("main", "1").get();
910+
}
911+
assertEquals(1, symbolSinkMock.jarScopes.size());
912+
}
913+
899914
private void assertLangSpecifics(
900915
LanguageSpecifics languageSpecifics,
901916
List<String> expectedModifiers,

0 commit comments

Comments
 (0)