Skip to content

Commit da90ba8

Browse files
author
Oleksandr Dzhychko
authored
Merge pull request #550 from modelix/perf/reduce-memory-usage-for-import-into-mps
reduce memory usage for import into mps
2 parents 3c577b4 + 59cf077 commit da90ba8

File tree

9 files changed

+230
-60
lines changed

9 files changed

+230
-60
lines changed

build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ subprojects {
116116
if (!name.lowercase().contains("test")) {
117117
this.kotlinOptions {
118118
jvmTarget = "11"
119-
freeCompilerArgs += listOf("-Xjvm-default=all-compatibility")
119+
freeCompilerArgs += listOf("-Xjvm-default=all-compatibility", "-Xexpect-actual-classes")
120120
apiVersion = kotlinApiVersion.version
121121
}
122122
}

bulk-model-sync-lib/build.gradle.kts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ kotlin {
2121
}
2222
}
2323

24+
val jvmMain by getting {
25+
dependencies {
26+
implementation(libs.trove4j)
27+
}
28+
}
29+
2430
val commonTest by getting {
2531
dependencies {
2632
implementation(project(":model-api"))
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright (c) 2024.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modelix.model.sync.bulk
18+
19+
/**
20+
* Built-in maps like [HashMap] are not the most memory efficient way of map.
21+
* A common issue is that entry objects for every item in the table are created.
22+
* [MemoryEfficientMap] is an internal implementation that we can use
23+
* when the memory overhead becomes too big.
24+
*
25+
* Java implementation is optimized to not create entry objects by using a map implementation from another library.
26+
* The JS implementation is not optimized yet because we did not invest time in finding a suitable library.
27+
*
28+
* [MemoryEfficientMap] is an internal abstraction.
29+
* The API is therefore kept minimal
30+
*/
31+
expect class MemoryEfficientMap<KeyT, ValueT>() {
32+
operator fun set(key: KeyT, value: ValueT)
33+
operator fun get(key: KeyT): ValueT?
34+
}

bulk-model-sync-lib/src/commonMain/kotlin/org/modelix/model/sync/bulk/ModelImporter.kt

Lines changed: 82 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import org.modelix.model.api.SerializedNodeReference
2525
import org.modelix.model.api.getDescendants
2626
import org.modelix.model.api.isChildRoleOrdered
2727
import org.modelix.model.api.remove
28+
import org.modelix.model.api.resolveInCurrentContext
2829
import org.modelix.model.data.ModelData
2930
import org.modelix.model.data.NodeData
3031
import kotlin.jvm.JvmName
@@ -48,11 +49,17 @@ class ModelImporter(
4849
private val continueOnError: Boolean,
4950
private val childFilter: (INode) -> Boolean = { true },
5051
) {
52+
// We have seen imports where the `originalIdToExisting` had a dozen ten million entries.
53+
// Therefore, choose a map with is optimized for memory usage.
54+
// For the same reason store `INodeReference`s instead of `INode`s.
55+
// In a few cases, where we need the `INode` we can resolve it.
56+
private val originalIdToExisting by lazy(::buildExistingIndex)
5157

52-
private val originalIdToExisting: MutableMap<String, INode> = mutableMapOf()
58+
// Use`INode` instead of `INodeReference` in `postponedReferences` and `nodesToRemove`
59+
// because we know that we will always need the `INode`s in those cases.
60+
// Those cases are deleting nodes and adding references to nodes.
5361
private val postponedReferences = mutableListOf<PostponedReference>()
5462
private val nodesToRemove = HashSet<INode>()
55-
private var numExpectedNodes = 0
5663
private var currentNodeProgress = 0
5764
private val logger = KotlinLogging.logger {}
5865

@@ -96,34 +103,51 @@ class ModelImporter(
96103
*/
97104
@JvmName("importData")
98105
fun import(data: ModelData) {
99-
INodeResolutionScope.runWithAdditionalScope(root.getArea()) {
100-
logImportSize(data.root, logger)
101-
logger.info { "Building indices for import..." }
102-
originalIdToExisting.clear()
103-
postponedReferences.clear()
104-
nodesToRemove.clear()
105-
numExpectedNodes = countExpectedNodes(data.root)
106-
val progressReporter = ProgressReporter(numExpectedNodes.toULong(), logger)
107-
currentNodeProgress = 0
108-
buildExistingIndex(root)
106+
importIntoNodes(sequenceOf(ExistingAndExpectedNode(root, data)))
107+
}
109108

110-
logger.info { "Importing nodes..." }
111-
data.root.originalId()?.let { originalIdToExisting[it] = root }
112-
syncNode(root, data.root, progressReporter)
109+
/**
110+
* Incrementally updates existing children of the given with specified data.
111+
*
112+
* @param nodeCombinationsToImport Combinations of an old existing child and the new expected data.
113+
* The combinations are consumed lazily.
114+
* Callers can use this to load expected data on demand.
115+
*/
116+
fun importIntoNodes(nodeCombinationsToImport: Sequence<ExistingAndExpectedNode>) {
117+
logger.info { "Building indices for import..." }
118+
postponedReferences.clear()
119+
nodesToRemove.clear()
113120

114-
logger.info { "Synchronizing references..." }
115-
postponedReferences.forEach { it.setPostponedReference() }
121+
nodeCombinationsToImport.forEach { nodeCombination ->
122+
importIntoNode(nodeCombination.expectedNodeData, nodeCombination.existingNode)
123+
}
116124

117-
logger.info { "Removing extra nodes..." }
118-
nodesToRemove.forEach {
119-
doAndPotentiallyContinueOnErrors {
120-
if (it.isValid) { // if it's invalid then it's already removed
121-
it.remove()
122-
}
125+
logger.info { "Synchronizing references..." }
126+
postponedReferences.forEach { it.setPostponedReference() }
127+
128+
logger.info { "Removing extra nodes..." }
129+
nodesToRemove.forEach {
130+
doAndPotentiallyContinueOnErrors {
131+
if (it.isValid) { // if it's invalid then it's already removed
132+
it.remove()
123133
}
124134
}
135+
}
125136

126-
logger.info { "Synchronization finished." }
137+
logger.info { "Synchronization finished." }
138+
}
139+
140+
private fun importIntoNode(expectedNodeData: ModelData, existingNode: INode = root) {
141+
INodeResolutionScope.runWithAdditionalScope(existingNode.getArea()) {
142+
logImportSize(expectedNodeData.root, logger)
143+
logger.info { "Building indices for nodes import..." }
144+
currentNodeProgress = 0
145+
val numExpectedNodes = countExpectedNodes(expectedNodeData.root)
146+
val progressReporter = ProgressReporter(numExpectedNodes.toULong(), logger)
147+
148+
logger.info { "Importing nodes..." }
149+
expectedNodeData.root.originalId()?.let { originalIdToExisting[it] = existingNode.reference }
150+
syncNode(existingNode, expectedNodeData.root, progressReporter)
127151
}
128152
}
129153

@@ -145,15 +169,25 @@ class ModelImporter(
145169
for (role in allRoles) {
146170
val expectedNodes = expectedParent.children.filter { it.role == role }
147171
val existingNodes = existingParent.getChildren(role).filter(childFilter).toList()
172+
val allExpectedNodesDoNotExist by lazy {
173+
expectedNodes.all { expectedNode ->
174+
val originalId = expectedNode.originalId()
175+
checkNotNull(originalId) { "Specified node '$expectedNode' has no ID." }
176+
originalIdToExisting[originalId] == null
177+
}
178+
}
148179

149180
// optimization that uses the bulk operation .addNewChildren
150-
if (existingNodes.isEmpty() && expectedNodes.all { originalIdToExisting[it.originalId()] == null }) {
151-
existingParent.addNewChildren(role, -1, expectedNodes.map { it.concept?.let { ConceptReference(it) } }).zip(expectedNodes).forEach { (newChild, expected) ->
152-
val expectedId = checkNotNull(expected.originalId()) { "Specified node '$expected' has no id" }
153-
newChild.setPropertyValue(NodeData.idPropertyKey, expectedId)
154-
originalIdToExisting[expectedId] = newChild
155-
syncNode(newChild, expected, progressReporter)
156-
}
181+
if (existingNodes.isEmpty() && allExpectedNodesDoNotExist) {
182+
existingParent.addNewChildren(role, -1, expectedNodes.map { it.concept?.let { ConceptReference(it) } })
183+
.zip(expectedNodes)
184+
.forEach { (newChild, expected) ->
185+
val expectedId = expected.originalId()
186+
checkNotNull(expectedId) { "Specified node '$expected' has no ID." }
187+
newChild.setPropertyValue(NodeData.idPropertyKey, expectedId)
188+
originalIdToExisting[expectedId] = newChild.reference
189+
syncNode(newChild, expected, progressReporter)
190+
}
157191
continue
158192
}
159193

@@ -186,13 +220,18 @@ class ModelImporter(
186220
val nodeAtIndex = existingChildren.getOrNull(newIndex)
187221
val expectedConcept = expected.concept?.let { s -> ConceptReference(s) }
188222
val childNode = if (nodeAtIndex?.originalId() != expectedId) {
189-
val existingNode = originalIdToExisting[expectedId]
190-
if (existingNode == null) {
223+
val existingNodeReference = originalIdToExisting[expectedId]
224+
if (existingNodeReference == null) {
191225
val newChild = existingParent.addNewChild(role, newIndex, expectedConcept)
192226
newChild.setPropertyValue(NodeData.idPropertyKey, expectedId)
193-
originalIdToExisting[expectedId] = newChild
227+
originalIdToExisting[expectedId] = newChild.reference
194228
newChild
195229
} else {
230+
val existingNode = existingNodeReference.resolveInCurrentContext()
231+
checkNotNull(existingNode) {
232+
// This reference should always be resolvable because the node existed or was created before.
233+
"Could not resolve $existingNodeReference."
234+
}
196235
// The existing child node is not only moved to a new index,
197236
// it is potentially moved to a new parent and role.
198237
existingParent.moveChild(role, newIndex, existingNode)
@@ -217,10 +256,12 @@ class ModelImporter(
217256
}
218257
}
219258

220-
private fun buildExistingIndex(root: INode) {
259+
private fun buildExistingIndex(): MemoryEfficientMap<String, INodeReference> {
260+
val localOriginalIdToExisting = MemoryEfficientMap<String, INodeReference>()
221261
root.getDescendants(true).forEach { node ->
222-
node.originalId()?.let { originalIdToExisting[it] = node }
262+
node.originalId()?.let { localOriginalIdToExisting[it] = node.reference }
223263
}
264+
return localOriginalIdToExisting
224265
}
225266

226267
private fun syncProperties(node: INode, nodeData: NodeData) {
@@ -268,3 +309,8 @@ internal fun INode.originalId(): String? {
268309
internal fun NodeData.originalId(): String? {
269310
return properties[NodeData.idPropertyKey] ?: id
270311
}
312+
313+
data class ExistingAndExpectedNode(
314+
val existingNode: INode,
315+
val expectedNodeData: ModelData,
316+
)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Copyright (c) 2024.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modelix.model.sync.bulk
18+
19+
actual class MemoryEfficientMap<KeyT, ValueT> {
20+
private val map: MutableMap<KeyT, ValueT> = mutableMapOf()
21+
22+
actual operator fun set(key: KeyT, value: ValueT) = map.set(key, value)
23+
24+
actual operator fun get(key: KeyT) = map[key]
25+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright (c) 2024.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modelix.model.sync.bulk
18+
19+
import gnu.trove.map.TMap
20+
import gnu.trove.map.hash.THashMap
21+
22+
actual class MemoryEfficientMap<KeyT, ValueT> {
23+
private val map: TMap<KeyT, ValueT> = THashMap()
24+
25+
actual operator fun set(key: KeyT, value: ValueT) = map.set(key, value)
26+
27+
actual operator fun get(key: KeyT) = map[key]
28+
}

bulk-model-sync-mps/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies {
2121

2222
implementation(kotlin("stdlib"))
2323
implementation(libs.kotlin.logging)
24+
implementation(libs.kotlin.serialization.json)
2425
}
2526

2627
publishing {

bulk-model-sync-mps/src/main/kotlin/org/modelix/mps/model/sync/bulk/MPSBulkSynchronizer.kt

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,17 @@ package org.modelix.mps.model.sync.bulk
1919
import com.intellij.openapi.application.ApplicationManager
2020
import com.intellij.openapi.project.ProjectManager
2121
import jetbrains.mps.ide.project.ProjectHelper
22+
import kotlinx.serialization.ExperimentalSerializationApi
23+
import kotlinx.serialization.json.Json
24+
import kotlinx.serialization.json.decodeFromStream
25+
import org.jetbrains.mps.openapi.module.SModule
2226
import org.jetbrains.mps.openapi.module.SRepository
23-
import org.modelix.model.api.BuiltinLanguages
24-
import org.modelix.model.api.INode
27+
import org.modelix.model.data.ModelData
2528
import org.modelix.model.mpsadapters.MPSModuleAsNode
2629
import org.modelix.model.mpsadapters.MPSRepositoryAsNode
30+
import org.modelix.model.sync.bulk.ExistingAndExpectedNode
2731
import org.modelix.model.sync.bulk.ModelExporter
2832
import org.modelix.model.sync.bulk.ModelImporter
29-
import org.modelix.model.sync.bulk.importFilesAsRootChildren
3033
import org.modelix.model.sync.bulk.isModuleIncluded
3134
import java.io.File
3235
import java.util.concurrent.atomic.AtomicInteger
@@ -37,7 +40,8 @@ object MPSBulkSynchronizer {
3740
fun exportRepository() {
3841
val repository = getRepository()
3942
val includedModuleNames = parseRawPropertySet(System.getProperty("modelix.mps.model.sync.bulk.output.modules"))
40-
val includedModulePrefixes = parseRawPropertySet(System.getProperty("modelix.mps.model.sync.bulk.output.modules.prefixes"))
43+
val includedModulePrefixes =
44+
parseRawPropertySet(System.getProperty("modelix.mps.model.sync.bulk.output.modules.prefixes"))
4145

4246
repository.modelAccess.runReadAction {
4347
val allModules = repository.modules
@@ -62,6 +66,7 @@ object MPSBulkSynchronizer {
6266
}
6367
}
6468

69+
@OptIn(ExperimentalSerializationApi::class)
6570
@JvmStatic
6671
fun importRepository() {
6772
val repository = getRepository()
@@ -70,26 +75,44 @@ object MPSBulkSynchronizer {
7075
val inputPath = System.getProperty("modelix.mps.model.sync.bulk.input.path")
7176
val continueOnError = System.getProperty("modelix.mps.model.sync.bulk.input.continueOnError", "false").toBoolean()
7277
val jsonFiles = File(inputPath).listFiles()?.filter {
73-
it.extension == "json" && isModuleIncluded(it.nameWithoutExtension, includedModuleNames, includedModulePrefixes)
78+
it.extension == "json" && isModuleIncluded(
79+
it.nameWithoutExtension,
80+
includedModuleNames,
81+
includedModulePrefixes,
82+
)
7483
}
7584

7685
if (jsonFiles.isNullOrEmpty()) error("no json files found for included modules")
7786

7887
println("Found ${jsonFiles.size} modules to be imported")
7988
val access = repository.modelAccess
8089
access.runWriteInEDT {
90+
val allModules = repository.modules
91+
val includedModules: Iterable<SModule> = allModules.filter {
92+
isModuleIncluded(it.moduleName!!, includedModuleNames, includedModulePrefixes)
93+
}
94+
val numIncludedModules = includedModules.count()
8195
access.executeCommand {
8296
val repoAsNode = MPSRepositoryAsNode(repository)
83-
84-
// Without the filter MPS would attempt to delete all modules that are not included
85-
fun moduleFilter(node: INode): Boolean {
86-
if (node.getConceptReference()?.getUID() != BuiltinLanguages.MPSRepositoryConcepts.Module.getUID()) return true
87-
val moduleName = node.getPropertyValue(BuiltinLanguages.jetbrains_mps_lang_core.INamedConcept.name) ?: return false
88-
return isModuleIncluded(moduleName, includedModuleNames, includedModulePrefixes)
89-
}
9097
println("Importing modules...")
9198
try {
92-
ModelImporter(repoAsNode, continueOnError, childFilter = ::moduleFilter).importFilesAsRootChildren(jsonFiles)
99+
println("Importing modules...")
100+
// `modulesToImport` lazily produces modules to import
101+
// so that loaded model data can be garbage collected.
102+
val modulesToImport = includedModules.asSequence().flatMapIndexed { index, module ->
103+
println("Importing module ${index + 1} of $numIncludedModules: '${module.moduleName}'")
104+
val fileName = inputPath + File.separator + module.moduleName + ".json"
105+
val moduleFile = File(fileName)
106+
if (moduleFile.exists()) {
107+
val expectedData: ModelData = moduleFile.inputStream().use(Json::decodeFromStream)
108+
sequenceOf(ExistingAndExpectedNode(MPSModuleAsNode(module), expectedData))
109+
} else {
110+
println("Skip importing ${module.moduleName}} because $fileName does not exist.")
111+
sequenceOf()
112+
}
113+
}
114+
ModelImporter(repoAsNode, continueOnError).importIntoNodes(modulesToImport)
115+
println("Import finished.")
93116
} catch (ex: Exception) {
94117
// Exceptions are only visible in the MPS log file by default
95118
ex.printStackTrace()

0 commit comments

Comments
 (0)