Skip to content

Commit 1bed582

Browse files
committed
feat(model-datastructure): large lists of child nodes are now split into smaller objects
In one of the sandboxes in MPS-extensions is a node with 100_000 child nodes. Large objects are bad for the scalability. One issue is that a single key-value entry can be larger than the page size in ignite and then the cache hasn't enough free space prepared and throws an `IgniteOutOfMemoryException`. But large objects will cause performance issues everywhere. After using the MPS IDs instead of int64 IDs, the average objects size increased and this problem just became more severe.
1 parent 69dc452 commit 1bed582

File tree

16 files changed

+259
-58
lines changed

16 files changed

+259
-58
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
package org.modelix.datastructures.list
2+
3+
import kotlinx.serialization.KSerializer
4+
import kotlinx.serialization.builtins.ListSerializer
5+
import kotlinx.serialization.builtins.serializer
6+
import kotlinx.serialization.descriptors.SerialDescriptor
7+
import kotlinx.serialization.encoding.Decoder
8+
import kotlinx.serialization.encoding.Encoder
9+
import org.modelix.datastructures.objects.IDataTypeConfiguration
10+
import org.modelix.datastructures.objects.IObjectData
11+
import org.modelix.datastructures.objects.IObjectDeserializer
12+
import org.modelix.datastructures.objects.IObjectGraph
13+
import org.modelix.datastructures.objects.IObjectReferenceFactory
14+
import org.modelix.datastructures.objects.Object
15+
import org.modelix.datastructures.objects.ObjectHash
16+
import org.modelix.datastructures.objects.ObjectReference
17+
import org.modelix.datastructures.objects.getDescendantsAndSelf
18+
import org.modelix.datastructures.objects.getHashString
19+
import org.modelix.datastructures.serialization.SerializationSeparators
20+
import org.modelix.kotlin.utils.urlDecode
21+
import org.modelix.kotlin.utils.urlEncode
22+
import org.modelix.streams.IStream
23+
import kotlin.collections.chunked
24+
25+
class LargeListConfig<E>(
26+
val graph: IObjectGraph,
27+
val elementType: IDataTypeConfiguration<E>,
28+
val maxNodeSize: Int = 20,
29+
) : IObjectDeserializer<LargeList<E>> {
30+
override fun deserialize(input: String, referenceFactory: IObjectReferenceFactory): LargeList<E> {
31+
val data = if (input.startsWith(LargeList.LARGE_LIST_PREFIX)) {
32+
val subLists = input.substring(LargeList.LARGE_LIST_PREFIX.length)
33+
.split(SerializationSeparators.LEVEL2)
34+
.map { referenceFactory(it, this) }
35+
LargeListInternalNode(this, subLists)
36+
} else {
37+
LargeListLeafNode(
38+
this,
39+
input.split(SerializationSeparators.LEVEL2)
40+
.filter { it.isNotEmpty() }
41+
.map { elementType.deserialize(it.urlDecode()!!) },
42+
)
43+
}
44+
return data
45+
}
46+
47+
fun createEmptyList(): LargeList<E> = LargeListLeafNode(this, emptyList())
48+
49+
fun createList(elements: List<E>): LargeList<E> {
50+
return if (elements.size <= maxNodeSize) {
51+
LargeListLeafNode(this, elements)
52+
} else {
53+
// split the elements into at most maxNodeSize sub lists, but also minimize the number of objects
54+
val sublistSizes = ((elements.size + maxNodeSize - 1) / maxNodeSize).coerceAtLeast(maxNodeSize)
55+
LargeListInternalNode(this, elements.chunked(sublistSizes) { graph.fromCreated(createList(it.toList())) }.toList())
56+
}
57+
}
58+
}
59+
60+
class LargeListKSerializer<E>(val config: LargeListConfig<E>) : KSerializer<LargeList<E>> {
61+
private val listSerializer = ListSerializer(String.serializer())
62+
override val descriptor: SerialDescriptor = listSerializer.descriptor
63+
64+
override fun serialize(encoder: Encoder, value: LargeList<E>) {
65+
when (value) {
66+
is LargeListInternalNode<E> -> listSerializer.serialize(encoder, value.subLists.map { it.getHashString() })
67+
is LargeListLeafNode<E> -> listSerializer.serialize(encoder, value.elements.map { config.elementType.serialize(it) })
68+
}
69+
}
70+
71+
override fun deserialize(decoder: Decoder): LargeList<E> {
72+
val strings = listSerializer.deserialize(decoder)
73+
return if (strings.isNotEmpty() && ObjectHash.isValidHashString(strings.first())) {
74+
LargeListInternalNode(
75+
config,
76+
strings.map { config.graph.fromHashString(it, config) },
77+
)
78+
} else {
79+
LargeListLeafNode(config, strings.map { config.elementType.deserialize(it) })
80+
}
81+
}
82+
}
83+
84+
sealed class LargeList<E>() : IObjectData {
85+
companion object {
86+
const val LARGE_LIST_PREFIX = "OL" + SerializationSeparators.LEVEL1
87+
}
88+
89+
abstract fun getElements(): IStream.Many<E>
90+
91+
override fun objectDiff(self: Object<*>, oldObject: Object<*>?): IStream.Many<Object<*>> {
92+
return self.getDescendantsAndSelf()
93+
}
94+
}
95+
96+
class LargeListInternalNode<E>(val config: LargeListConfig<E>, val subLists: List<ObjectReference<LargeList<E>>>) : LargeList<E>() {
97+
override fun serialize(): String {
98+
return LARGE_LIST_PREFIX + subLists.joinToString(SerializationSeparators.LEVEL2) { it.getHashString() }
99+
}
100+
101+
override fun getDeserializer(): IObjectDeserializer<LargeList<E>> = config
102+
103+
override fun getContainmentReferences(): List<ObjectReference<IObjectData>> {
104+
return subLists.toList()
105+
}
106+
107+
override fun getElements(): IStream.Many<E> {
108+
return IStream.many(subLists).flatMap {
109+
it.resolveData().flatMap { it.getElements() }
110+
}
111+
}
112+
}
113+
114+
class LargeListLeafNode<E>(val config: LargeListConfig<E>, val elements: List<E>) : LargeList<E>() {
115+
override fun serialize(): String {
116+
return if (elements.isEmpty()) {
117+
""
118+
} else {
119+
elements.joinToString(SerializationSeparators.LEVEL2) { config.elementType.serialize(it).urlEncode() }
120+
}
121+
}
122+
123+
override fun getDeserializer(): IObjectDeserializer<LargeList<E>> = config
124+
125+
override fun getContainmentReferences(): List<ObjectReference<IObjectData>> {
126+
return elements.flatMap { config.elementType.getContainmentReferences(it) }
127+
}
128+
129+
override fun getElements(): IStream.Many<E> {
130+
return IStream.many(elements)
131+
}
132+
}

model-datastructure/src/commonMain/kotlin/org/modelix/datastructures/model/GenericModelTree.kt

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ abstract class GenericModelTree<NodeId>(
9292
}
9393

9494
override fun getChildren(parentId: NodeId): IStream.Many<NodeId> {
95-
return resolveNode(parentId).flatMapIterable { it.children }.map { it }
95+
return resolveNode(parentId).flatMap { it.getChildIds() }.map { it }
9696
}
9797

9898
private fun getRoleOfChild(childId: NodeId): IStream.One<IChildLinkReference> {
@@ -194,8 +194,8 @@ abstract class GenericModelTree<NodeId>(
194194
}
195195
}
196196

197-
val newChildren = IStream.many(newNode.children).flatMap { getNode(it) }.toList()
198-
val oldChildren = IStream.many(oldNode.children).flatMap { oldTree.getNode(it) }.toList()
197+
val newChildren = newNode.getChildIds().flatMap { getNode(it) }.toList()
198+
val oldChildren = oldNode.getChildIds().flatMap { oldTree.getNode(it) }.toList()
199199
val childrenChanges: IStream.Many<ChildrenChangedEvent<NodeId>> = newChildren.zipWith(oldChildren) { newChildrenList, oldChildrenList ->
200200
val oldChildren: MutableMap<String?, MutableList<NodeObjectData<NodeId>>> = HashMap()
201201
val newChildren: MutableMap<String?, MutableList<NodeObjectData<NodeId>>> = HashMap()
@@ -296,7 +296,7 @@ abstract class GenericModelTree<NodeId>(
296296
): IStream.One<IPersistentMap<NodeId, NodeObjectData<NodeId>>> {
297297
val newNodes = newIds.zip(concepts).map { (childId, concept) ->
298298
childId to NodeObjectData<NodeId>(
299-
deserializer = NodeObjectData.Deserializer(this.nodesMap.getKeyTypeConfig(), getId()),
299+
deserializer = NodeObjectData.Deserializer(graph, this.nodesMap.getKeyTypeConfig(), getId()),
300300
id = childId,
301301
concept = concept.takeIf { it != NullConcept.getReference() },
302302
containment = parentId to role,
@@ -320,19 +320,21 @@ abstract class GenericModelTree<NodeId>(
320320

321321
private fun insertChildrenIntoParentData(parentData: NodeObjectData<NodeId>, index: Int, newIds: Iterable<NodeId>, role: IChildLinkReference): IStream.One<NodeObjectData<NodeId>> {
322322
return if (index == -1) {
323-
IStream.Companion.of(parentData.children + newIds)
323+
parentData.getChildIds() + IStream.many(newIds)
324324
} else {
325-
this.getChildren(parentData.id, role).toList().map { childrenInRole ->
325+
this.getChildren(parentData.id, role).toList().flatMap { childrenInRole ->
326326
if (index > childrenInRole.size) throw RuntimeException("Invalid index $index. There are only ${childrenInRole.size} nodes in ${parentData.id}.$role")
327327
if (index == childrenInRole.size) {
328-
parentData.children + newIds
328+
parentData.getChildIds() + newIds
329329
} else {
330-
val indexInAll = parentData.children.indexOf(childrenInRole[index])
331-
parentData.children.take(indexInAll) + newIds + parentData.children.drop(indexInAll)
330+
parentData.getChildIds().toList().flatMapIterable { children ->
331+
val indexInAll = children.indexOf(childrenInRole[index])
332+
children.take(indexInAll) + newIds + children.drop(indexInAll)
333+
}
332334
}
333335
}
334-
}.map { newChildrenArray ->
335-
parentData.copy(children = newChildrenArray)
336+
}.toList().map { newChildrenArray ->
337+
parentData.withChildren(newChildrenArray)
336338
}
337339
}
338340

@@ -383,7 +385,7 @@ abstract class GenericModelTree<NodeId>(
383385

384386
val newTree: IStream.One<GenericModelTree<NodeId>> = oldParent.zipWith(adjustedIndex) { oldParentId, adjustedIndex ->
385387
val withChildRemoved = updateNode(oldParentId) {
386-
IStream.of(it.withChildRemoved(childId))
388+
it.withChildRemoved(childId)
387389
}.wrap()
388390
val withChildAdded = withChildRemoved.flatMapOne { tree ->
389391
tree.updateNode(newParentId) {
@@ -414,7 +416,7 @@ abstract class GenericModelTree<NodeId>(
414416
val parent: IStream.One<NodeId> = getParent(nodeId).exceptionIfEmpty { IllegalArgumentException("Cannot delete node without parent: $nodeId") }
415417

416418
return parent.zipWith(mapWithoutRemovedNodes) { parentId, mapWithoutRemovedNodes ->
417-
updateNodeInMap(mapWithoutRemovedNodes, parentId) { IStream.of(it.withChildRemoved(nodeId)) }
419+
updateNodeInMap(mapWithoutRemovedNodes, parentId) { it.withChildRemoved(nodeId) }
418420
}.flatten().wrap()
419421
}
420422
}

model-datastructure/src/commonMain/kotlin/org/modelix/datastructures/model/ModelTreeBuilder.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ abstract class ModelTreeBuilder<NodeId> private constructor(protected val common
2828
override fun build(): IGenericModelTree<Long> {
2929
val nodeIdType = LongDataTypeConfiguration()
3030
val root = NodeObjectData<Long>(
31-
deserializer = NodeObjectData.Deserializer(nodeIdType, common.treeId),
31+
deserializer = NodeObjectData.Deserializer(common.graph, nodeIdType, common.treeId),
3232
id = ITree.ROOT_ID,
3333
concept = null,
3434
containment = null,
@@ -37,7 +37,7 @@ abstract class ModelTreeBuilder<NodeId> private constructor(protected val common
3737
val config = HamtNode.Config(
3838
graph = common.graph,
3939
keyConfig = nodeIdType,
40-
valueConfig = ObjectReferenceDataTypeConfiguration(common.graph, NodeObjectData.Deserializer(nodeIdType, common.treeId)),
40+
valueConfig = ObjectReferenceDataTypeConfiguration(common.graph, NodeObjectData.Deserializer(common.graph, nodeIdType, common.treeId)),
4141
)
4242
return HamtInternalNode.createEmpty(config)
4343
.put(root.data.id, root.ref, common.graph)
@@ -52,15 +52,15 @@ abstract class ModelTreeBuilder<NodeId> private constructor(protected val common
5252
override fun build(): IGenericModelTree<INodeReference> {
5353
val nodeIdType = NodeReferenceDataTypeConfig()
5454
val root = NodeObjectData<INodeReference>(
55-
deserializer = NodeObjectData.Deserializer(nodeIdType, common.treeId),
55+
deserializer = NodeObjectData.Deserializer(common.graph, nodeIdType, common.treeId),
5656
id = PNodeReference(ITree.ROOT_ID, common.treeId.id),
5757
concept = null,
5858
containment = null,
5959
).asObject(common.graph)
6060
val config = PatriciaTrieConfig(
6161
graph = common.graph,
6262
keyConfig = nodeIdType,
63-
valueConfig = ObjectReferenceDataTypeConfiguration(common.graph, NodeObjectData.Deserializer(nodeIdType, common.treeId)),
63+
valueConfig = ObjectReferenceDataTypeConfiguration(common.graph, NodeObjectData.Deserializer(common.graph, nodeIdType, common.treeId)),
6464
)
6565
return PatriciaTrie(config).put(root.data.id, root.ref).getBlocking(common.graph).autoResolveValues().asModelTree(common.treeId)
6666
}

model-datastructure/src/commonMain/kotlin/org/modelix/datastructures/model/NodeObjectData.kt

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,20 @@ package org.modelix.datastructures.model
22

33
import kotlinx.serialization.Contextual
44
import kotlinx.serialization.Serializable
5-
import kotlinx.serialization.Transient
65
import kotlinx.serialization.modules.SerializersModule
6+
import org.modelix.datastructures.list.LargeList
7+
import org.modelix.datastructures.list.LargeListConfig
8+
import org.modelix.datastructures.list.LargeListKSerializer
79
import org.modelix.datastructures.objects.IDataTypeConfiguration
810
import org.modelix.datastructures.objects.IObjectData
911
import org.modelix.datastructures.objects.IObjectDeserializer
12+
import org.modelix.datastructures.objects.IObjectGraph
1013
import org.modelix.datastructures.objects.IObjectReferenceFactory
1114
import org.modelix.datastructures.objects.LongDataTypeConfiguration
1215
import org.modelix.datastructures.objects.Object
1316
import org.modelix.datastructures.objects.ObjectReference
1417
import org.modelix.datastructures.objects.asKSerializer
18+
import org.modelix.datastructures.objects.getDescendantsAndSelf
1519
import org.modelix.datastructures.serialization.SplitJoinFormat
1620
import org.modelix.datastructures.serialization.TransformingSerializer
1721
import org.modelix.kotlin.utils.DelicateModelixApi
@@ -26,15 +30,14 @@ import org.modelix.model.api.NullChildLinkReference
2630
import org.modelix.model.api.meta.NullConcept
2731
import org.modelix.streams.IStream
2832

29-
@Serializable
3033
data class NodeObjectData<NodeId>(
31-
@Transient val deserializer: Deserializer<NodeId>? = null,
34+
val deserializer: Deserializer<NodeId>? = null,
3235
val id: NodeId,
3336
val concept: ConceptReference? = null,
3437
val containment: Pair<NodeId, IChildLinkReference>? = null,
35-
val children: List<NodeId> = emptyList(),
38+
val children: LargeList<NodeId>? = null,
3639
val properties: List<Pair<String, String>> = emptyList(),
37-
val references: List<Pair<String, @Contextual INodeReference>> = emptyList(),
40+
val references: List<Pair<String, INodeReference>> = emptyList(),
3841
) : IObjectData {
3942

4043
init {
@@ -45,6 +48,8 @@ data class NodeObjectData<NodeId>(
4548
val parentId: NodeId? get() = containment?.first
4649
val roleInParent: IChildLinkReference get() = containment?.second ?: NullChildLinkReference
4750

51+
fun getChildIds(): IStream.Many<NodeId> = children?.getElements() ?: IStream.empty()
52+
4853
override fun serialize(): String {
4954
return deserializer!!.serialFormat.encodeToString(deserializer.kSerializer, this)
5055
}
@@ -54,7 +59,7 @@ data class NodeObjectData<NodeId>(
5459
}
5560

5661
override fun getContainmentReferences(): List<ObjectReference<IObjectData>> {
57-
return emptyList()
62+
return children?.getContainmentReferences() ?: emptyList()
5863
}
5964

6065
fun getProperty(role: IPropertyReference): String? {
@@ -107,22 +112,29 @@ data class NodeObjectData<NodeId>(
107112
}
108113
}
109114

110-
fun withChildRemoved(childId: NodeId): NodeObjectData<NodeId> {
111-
return copy(children = children.filterNot { deserializer!!.nodeIdTypeConfig.equal(it, childId) })
115+
fun withChildRemoved(childId: NodeId): IStream.One<NodeObjectData<NodeId>> {
116+
return getChildIds().filter { !deserializer!!.nodeIdTypeConfig.equal(it, childId) }.toList().map { newChildren ->
117+
withChildren(newChildren)
118+
}
112119
}
113120

121+
fun withChildren(newChildren: List<NodeId>) = copy(children = deserializer!!.largeListConfig.createList(newChildren))
122+
114123
override fun objectDiff(self: Object<*>, oldObject: Object<*>?): IStream.Many<Object<*>> {
115-
return IStream.of(self)
124+
return self.getDescendantsAndSelf()
116125
}
117126

118127
class Deserializer<NodeId>(
128+
val graph: IObjectGraph,
119129
val nodeIdTypeConfig: IDataTypeConfiguration<NodeId>,
120130
val treeId: TreeId,
121131
) : IObjectDeserializer<NodeObjectData<NodeId>> {
122132
val referenceTypeConfig = LegacyNodeReferenceDataTypeConfig(treeId)
133+
val largeListConfig = LargeListConfig(graph, nodeIdTypeConfig)
123134
val serialFormat = SplitJoinFormat(
124135
SerializersModule {
125136
contextual(INodeReference::class, referenceTypeConfig.asKSerializer())
137+
contextual(LargeList::class) { LargeListKSerializer(largeListConfig) }
126138
},
127139
)
128140

@@ -142,7 +154,7 @@ data class NodeObjectData<NodeId>(
142154
concept = value.concept?.takeIf { it != NullConcept.getReference() },
143155
parent = encodeNullId(value.parentId),
144156
role = value.roleInParent.getIdOrNameOrNull(),
145-
children = value.children,
157+
children = value.children ?: largeListConfig.createEmptyList(),
146158
properties = value.properties.toMap(),
147159
references = value.references.toMap(),
148160
)
@@ -176,20 +188,23 @@ data class LegacyCompatibleFormat<NodeId, ReferenceType>(
176188
val concept: ConceptReference?,
177189
val parent: NodeId?,
178190
val role: String?,
179-
val children: List<NodeId>,
191+
@Contextual
192+
val children: LargeList<NodeId>,
180193
val properties: Map<String, String>,
181194
val references: Map<String, ReferenceType>,
182195
)
183196

184-
fun IReadableNode.toNodeObjectData(): NodeObjectData<INodeReference> {
197+
fun IReadableNode.toNodeObjectData(graph: IObjectGraph): NodeObjectData<INodeReference> {
198+
val nodeDataDeserializer = NodeObjectData.Deserializer(graph, NodeReferenceDataTypeConfig(), getTreeId())
199+
185200
// usage of getIdOrName: persist ID only to prevent ObjectHash changes when metamodel elements are renamed
186201
@OptIn(DelicateModelixApi::class)
187202
return NodeObjectData(
188-
deserializer = NodeObjectData.Deserializer(NodeReferenceDataTypeConfig(), getTreeId()),
203+
deserializer = nodeDataDeserializer,
189204
id = getNodeReference(),
190205
concept = getConceptReference(),
191206
containment = getParent()?.let { it.getNodeReference() to getContainmentLink() },
192-
children = getAllChildren().map { it.getNodeReference() },
207+
children = nodeDataDeserializer.largeListConfig.createList(getAllChildren().map { it.getNodeReference() }),
193208
properties = getAllProperties().map { it.first.getIdOrName() to it.second },
194209
references = getAllReferenceTargetRefs().map { it.first.getIdOrName() to it.second },
195210
)

model-datastructure/src/commonMain/kotlin/org/modelix/model/lazy/CLTree.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,15 @@ private fun createNewTreeData(
3737
useRoleIds: Boolean = true,
3838
): Object<CPTree> {
3939
val root = NodeObjectData<Long>(
40-
deserializer = NodeObjectData.Deserializer(LongDataTypeConfiguration(), treeId),
40+
deserializer = NodeObjectData.Deserializer(graph, LongDataTypeConfiguration(), treeId),
4141
id = ITree.ROOT_ID,
4242
concept = null,
4343
containment = null,
4444
)
4545
val config = HamtNode.Config(
4646
graph = graph,
4747
keyConfig = LongDataTypeConfiguration(),
48-
valueConfig = ObjectReferenceDataTypeConfiguration(graph, NodeObjectData.Deserializer(LongDataTypeConfiguration(), treeId)),
48+
valueConfig = ObjectReferenceDataTypeConfiguration(graph, NodeObjectData.Deserializer(graph, LongDataTypeConfiguration(), treeId)),
4949
)
5050
@OptIn(DelicateModelixApi::class) // this is a new object
5151
return CPTree(

0 commit comments

Comments
 (0)