Skip to content

Commit 0366e43

Browse files
author
Oleksandr Dzhychko
committed
perf(bulk-model-sync): use a memory efficient map for critical indices
Use trove4js `THashMap` instead of Javas `HashMap` because it does not create entry objects for map items.
1 parent 0f1b630 commit 0366e43

File tree

6 files changed

+115
-11
lines changed

6 files changed

+115
-11
lines changed

build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ subprojects {
116116
if (!name.lowercase().contains("test")) {
117117
this.kotlinOptions {
118118
jvmTarget = "11"
119-
freeCompilerArgs += listOf("-Xjvm-default=all-compatibility")
119+
freeCompilerArgs += listOf("-Xjvm-default=all-compatibility", "-Xexpect-actual-classes")
120120
apiVersion = kotlinApiVersion.version
121121
}
122122
}

bulk-model-sync-lib/build.gradle.kts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ kotlin {
2121
}
2222
}
2323

24+
val jvmMain by getting {
25+
dependencies {
26+
implementation(libs.trove4j)
27+
}
28+
}
29+
2430
val commonTest by getting {
2531
dependencies {
2632
implementation(project(":model-api"))
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright (c) 2024.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modelix.model.sync.bulk
18+
19+
/**
20+
* Built-in maps like [HashMap] are not the most memory efficient way of map.
21+
* A common issue is that entry objects for every item in the table are created.
22+
* [MemoryEfficientMap] is an internal implementation that we can use
23+
* when the memory overhead becomes too big.
24+
*
25+
* Java implementation is optimized to not create entry objects by using a map implementation from another library.
26+
* The JS implementation is not optimized yet because we did not invest time in finding a suitable library.
27+
*
28+
* [MemoryEfficientMap] is an internal abstraction.
29+
* The API is therefore kept minimal
30+
*/
31+
expect class MemoryEfficientMap<KeyT, ValueT>() {
32+
operator fun set(key: KeyT, value: ValueT)
33+
operator fun get(key: KeyT): ValueT?
34+
}

bulk-model-sync-lib/src/commonMain/kotlin/org/modelix/model/sync/bulk/ModelImporter.kt

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ class ModelImporter(
4848
private val continueOnError: Boolean,
4949
private val childFilter: (INode) -> Boolean = { true },
5050
) {
51-
52-
private val originalIdToExisting: MutableMap<String, INode> = mutableMapOf()
51+
// We have seen imports where the `originalIdToExisting` had a dozen ten million entries.
52+
// Therefore, choose a map with is optimized for memory usage.
53+
private var originalIdToExisting = MemoryEfficientMap<String, INode>()
5354
private val postponedReferences = mutableListOf<PostponedReference>()
5455
private val nodesToRemove = HashSet<INode>()
5556
private var numExpectedNodes = 0
@@ -99,7 +100,7 @@ class ModelImporter(
99100
INodeResolutionScope.runWithAdditionalScope(root.getArea()) {
100101
logImportSize(data.root, logger)
101102
logger.info { "Building indices for import..." }
102-
originalIdToExisting.clear()
103+
originalIdToExisting = MemoryEfficientMap()
103104
postponedReferences.clear()
104105
nodesToRemove.clear()
105106
numExpectedNodes = countExpectedNodes(data.root)
@@ -145,15 +146,25 @@ class ModelImporter(
145146
for (role in allRoles) {
146147
val expectedNodes = expectedParent.children.filter { it.role == role }
147148
val existingNodes = existingParent.getChildren(role).filter(childFilter).toList()
149+
val allExpectedNodesDoNotExist by lazy {
150+
expectedNodes.all { expectedNode ->
151+
val originalId = expectedNode.originalId()
152+
checkNotNull(originalId) { "Specified node '$expectedNode' has no ID." }
153+
originalIdToExisting[originalId] == null
154+
}
155+
}
148156

149157
// optimization that uses the bulk operation .addNewChildren
150-
if (existingNodes.isEmpty() && expectedNodes.all { originalIdToExisting[it.originalId()] == null }) {
151-
existingParent.addNewChildren(role, -1, expectedNodes.map { it.concept?.let { ConceptReference(it) } }).zip(expectedNodes).forEach { (newChild, expected) ->
152-
val expectedId = checkNotNull(expected.originalId()) { "Specified node '$expected' has no id" }
153-
newChild.setPropertyValue(NodeData.idPropertyKey, expectedId)
154-
originalIdToExisting[expectedId] = newChild
155-
syncNode(newChild, expected, progressReporter)
156-
}
158+
if (existingNodes.isEmpty() && allExpectedNodesDoNotExist) {
159+
existingParent.addNewChildren(role, -1, expectedNodes.map { it.concept?.let { ConceptReference(it) } })
160+
.zip(expectedNodes)
161+
.forEach { (newChild, expected) ->
162+
val expectedId = expected.originalId()
163+
checkNotNull(expectedId) { "Specified node '$expected' has no ID." }
164+
newChild.setPropertyValue(NodeData.idPropertyKey, expectedId)
165+
originalIdToExisting[expectedId] = newChild
166+
syncNode(newChild, expected, progressReporter)
167+
}
157168
continue
158169
}
159170

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Copyright (c) 2024.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modelix.model.sync.bulk
18+
19+
actual class MemoryEfficientMap<KeyT, ValueT> {
20+
private val map: MutableMap<KeyT, ValueT> = mutableMapOf()
21+
22+
actual operator fun set(key: KeyT, value: ValueT) = map.set(key, value)
23+
24+
actual operator fun get(key: KeyT) = map[key]
25+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright (c) 2024.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modelix.model.sync.bulk
18+
19+
import gnu.trove.map.TMap
20+
import gnu.trove.map.hash.THashMap
21+
22+
actual class MemoryEfficientMap<KeyT, ValueT> {
23+
private val map: TMap<KeyT, ValueT> = THashMap()
24+
25+
actual operator fun set(key: KeyT, value: ValueT) = map.set(key, value)
26+
27+
actual operator fun get(key: KeyT) = map[key]
28+
}

0 commit comments

Comments
 (0)