Skip to content

Commit 312d1c3

Browse files
Merge pull request #231 from AikidoSec/fix-slow-speeds-due-to-recursion-in-data-schema-extractor
Fix: Use a set of scanned objects to avoid recursion in getDataSchema
2 parents f3b4139 + ac0df0f commit 312d1c3

File tree

3 files changed

+97
-28
lines changed

3 files changed

+97
-28
lines changed

agent_api/src/main/java/dev/aikido/agent_api/api_discovery/DataSchemaGenerator.java

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,21 @@ public class DataSchemaGenerator {
1515
private static final int MAX_PROPS = 100;
1616

1717
public static DataSchemaItem getDataSchema(Object data) {
18-
return new DataSchemaGenerator().getDataSchema(data, 0);
18+
// We use an IdentityHashMap to skip the hashCode() and equals() checks
19+
// these checks can take longer than a simple identity check, and in rare cases
20+
// be themselves recursive.
21+
Set<Object> scanned = Collections.newSetFromMap(new IdentityHashMap<>());
22+
return new DataSchemaGenerator().getDataSchema(data, 0, scanned);
1923
}
2024

21-
private DataSchemaItem getDataSchema(Object data, int depth) {
22-
if (data == null) {
25+
private DataSchemaItem getDataSchema(Object data, int depth, Set<Object> scanned) {
26+
if (depth > MAX_TRAVERSAL_DEPTH) {
27+
// avoid expensive recursion loops
28+
return new DataSchemaItem(DataSchemaType.EMPTY);
29+
}
30+
depth += 1;
31+
32+
if (data == null || scanned.contains(data)) {
2333
// Handle null as a special case
2434
return new DataSchemaItem(DataSchemaType.EMPTY);
2535
}
@@ -29,15 +39,18 @@ private DataSchemaItem getDataSchema(Object data, int depth) {
2939
return new DataSchemaItem(primitiveToType(data));
3040
}
3141

42+
// Don't add primitive types to the scanned list (which avoids slow & heavy recursions)
43+
scanned.add(data);
44+
3245
// Collection is a catch-all for lists, sets, ...
3346
if (data instanceof Collection<?> dataList) {
34-
return getDataSchema(dataList.toArray(), depth);
47+
return getDataSchema(dataList.toArray(), depth, scanned);
3548
}
3649
// Arrays are still another thing :
3750
if (data.getClass().isArray()) {
3851
DataSchemaItem items = null;
3952
for (int i = 0; i < Math.min(MAX_ARRAY_DEPTH, Array.getLength(data)); i++) {
40-
DataSchemaItem childDataSchemaItem = getDataSchema(Array.get(data, i), depth);
53+
DataSchemaItem childDataSchemaItem = getDataSchema(Array.get(data, i), depth, scanned);
4154
if (items == null) {
4255
items = childDataSchemaItem;
4356
} else {
@@ -53,32 +66,30 @@ private DataSchemaItem getDataSchema(Object data, int depth) {
5366

5467
// If the depth is less than the maximum depth, get the schema for each property
5568
Map<String, DataSchemaItem> props = new HashMap<>();
56-
if (depth <= MAX_TRAVERSAL_DEPTH) {
57-
if (data instanceof Map<?, ?> map) {
58-
for (Object key : map.keySet()) {
69+
if (data instanceof Map<?, ?> map) {
70+
for (Object key : map.keySet()) {
71+
if (props.size() >= MAX_PROPS) {
72+
// We cannot allow more properties than MAX_PROPS, breaking for loop.
73+
break;
74+
}
75+
props.put((String) key, getDataSchema(map.get(key), depth, scanned));
76+
}
77+
} else if (data.getClass().toString().startsWith("class org.codehaus.groovy")) {
78+
// pass through, we do not want to check org.codehaus.groovy
79+
} else {
80+
Field[] fields = data.getClass().getDeclaredFields();
81+
for (Field field : fields) {
82+
try {
83+
if (Modifier.isTransient(field.getModifiers())) {
84+
continue; // Do not scan transient fields.
85+
}
86+
field.setAccessible(true); // Allow access to private fields
5987
if (props.size() >= MAX_PROPS) {
6088
// We cannot allow more properties than MAX_PROPS, breaking for loop.
6189
break;
6290
}
63-
props.put((String) key, getDataSchema(map.get(key), depth + 1));
64-
}
65-
} else if (data.getClass().toString().startsWith("class org.codehaus.groovy")) {
66-
// pass through, we do not want to check org.codehaus.groovy
67-
} else {
68-
Field[] fields = data.getClass().getDeclaredFields();
69-
for (Field field : fields) {
70-
try {
71-
if (Modifier.isTransient(field.getModifiers())) {
72-
continue; // Do not scan transient fields.
73-
}
74-
field.setAccessible(true); // Allow access to private fields
75-
if (props.size() >= MAX_PROPS) {
76-
// We cannot allow more properties than MAX_PROPS, breaking for loop.
77-
break;
78-
}
79-
props.put(field.getName(), getDataSchema(field.get(data), depth + 1));
80-
} catch (IllegalAccessException | RuntimeException ignored) {
81-
}
91+
props.put(field.getName(), getDataSchema(field.get(data), depth, scanned));
92+
} catch (IllegalAccessException | RuntimeException ignored) {
8293
}
8394
}
8495
}

agent_api/src/main/java/dev/aikido/agent_api/helpers/patterns/PrimitiveType.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ public static boolean isPrimitiveOrString(Object source) {
3232
if (WRAPPER_TYPE_MAP.containsKey(source.getClass())) {
3333
return true;
3434
}
35+
if (source instanceof Number) {
36+
// Add special case for numbers, since it's hard to put all different number types in this map.
37+
return true;
38+
}
3539
return source instanceof String;
3640
}
3741
}

agent_api/src/test/java/api_discovery/DataSchemaGeneratorTest.java

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import dev.aikido.agent_api.api_discovery.DataSchemaItem;
66
import dev.aikido.agent_api.api_discovery.DataSchemaType;
77
import org.junit.jupiter.api.Test;
8+
9+
import java.math.BigDecimal;
810
import java.util.*;
911
import static org.junit.jupiter.api.Assertions.*;
1012

@@ -45,6 +47,58 @@ public void testGetDataSchemaComplexObject() {
4547
assertEquals(DataSchemaType.NUMBER, schema.properties().get("arr").items().type());
4648
}
4749

50+
@Test
51+
public void testGetDataSchemaWithBigDecimal() {
52+
Map<String, Object> input = new HashMap<>();
53+
input.put("test", new BigDecimal("2.2"));
54+
DataSchemaItem schema = DataSchemaGenerator.getDataSchema(input);
55+
assertEquals(DataSchemaType.OBJECT, schema.type());
56+
assertEquals(DataSchemaType.NUMBER, schema.properties().get("test").type());
57+
}
58+
59+
static class MyRecursiveTestClass {
60+
private String world;
61+
private MyRecursiveTestClass myClass1;
62+
private MyRecursiveTestClass myClass2;
63+
private MyRecursiveTestClass myClass3;
64+
private MyRecursiveTestClass myClass4;
65+
private MyRecursiveTestClass myClass5;
66+
private MyRecursiveTestClass myClass6;
67+
private MyRecursiveTestClass myClass7;
68+
private MyRecursiveTestClass myClass8;
69+
private MyRecursiveTestClass myClass9;
70+
private MyRecursiveTestClass myClass10;
71+
MyRecursiveTestClass(String world) {
72+
this.world = world;
73+
}
74+
75+
public void setClasses(MyRecursiveTestClass recursiveTestClass) {
76+
this.myClass1 = recursiveTestClass;
77+
this.myClass2 = recursiveTestClass;
78+
this.myClass3 = recursiveTestClass;
79+
this.myClass4 = recursiveTestClass;
80+
this.myClass5 = recursiveTestClass;
81+
this.myClass6 = recursiveTestClass;
82+
this.myClass7 = recursiveTestClass;
83+
this.myClass8 = recursiveTestClass;
84+
this.myClass9 = recursiveTestClass;
85+
this.myClass10 = recursiveTestClass;
86+
}
87+
}
88+
89+
@Test
90+
public void testKeepsTrackOfScannedObjects() {
91+
// Causes Java Heap Space bug if we are not keeping track of scanned objects.
92+
Map<String, Object> input = new HashMap<>();
93+
var testRecursionClass = new MyRecursiveTestClass("World1");
94+
testRecursionClass.setClasses(testRecursionClass);
95+
input.put("test", testRecursionClass);
96+
DataSchemaItem schema = DataSchemaGenerator.getDataSchema(input);
97+
assertEquals(DataSchemaType.OBJECT, schema.type());
98+
assertEquals(DataSchemaType.OBJECT, schema.properties().get("test").type());
99+
assertEquals(DataSchemaType.STRING, schema.properties().get("test").properties().get("world").type());
100+
}
101+
48102
@Test
49103
public void testGetDataSchemaNestedObject() {
50104
Map<String, Object> input = new HashMap<>();
@@ -165,7 +219,7 @@ public void testMaxProperties() {
165219
}
166220

167221
private Map<String, Object> generateTestObjectWithDepth(int depth) {
168-
if (depth == 0) {
222+
if (depth == 1) {
169223
return Collections.singletonMap("value", "testValue");
170224
}
171225
return Collections.singletonMap("prop", generateTestObjectWithDepth(depth - 1));

0 commit comments

Comments
 (0)