From e62f85b1455dcd8979fb7a275c90efc108737000 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 18 Jan 2023 17:52:56 +0530 Subject: [PATCH 01/31] Stored groups to avoid recomputation. Brings down held task computation time to 15% of earlier ( 6X speedup ) --- .../queryengine/HeldTaskCompletion.scala | 82 +++++++++++++++++-- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index f0b0cf5fc9e1..d424448c3300 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -1,5 +1,7 @@ package io.joern.dataflowengineoss.queryengine +import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} + import scala.collection.mutable import scala.collection.parallel.CollectionConverters._ @@ -18,7 +20,7 @@ import scala.collection.parallel.CollectionConverters._ */ class HeldTaskCompletion( heldTasks: List[ReachableByTask], - resultTable: mutable.Map[TaskFingerprint, List[TableEntry]] + resultTable: mutable.Map[TaskFingerprint, List[TableEntry]], ) { /** Add results produced by held task until no more change can be observed. @@ -46,6 +48,7 @@ class HeldTaskCompletion( def noneChanged = toProcess.map { t => t.fingerprint -> false }.toMap var changed: Map[TaskFingerprint, Boolean] = allChanged + val groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]] = mutable.Map() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -61,7 +64,7 @@ class HeldTaskCompletion( changed = noneChanged taskResultsPairs.foreach { case (t, resultsForTask, newResults) => - addCompletedTasksToMainTable(newResults.toList) + addCompletedTasksToMainTable(newResults.toList, groupMap) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } @@ -117,11 +120,40 @@ class HeldTaskCompletion( pathSeq.distinct.size == pathSeq.size } - private def addCompletedTasksToMainTable(results: List[(TaskFingerprint, TableEntry)]): Unit = { + private def addCompletedTasksToMainTable(results: List[(TaskFingerprint, TableEntry)], + groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], + Boolean), + (CfgNode, List[Call], Boolean)), List[TableEntry]]], + ): Unit = { results.groupBy(_._1).foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) val old = resultTable.getOrElse(fingerprint, Vector()).toList - resultTable.put(fingerprint, deduplicateTableEntries(old ++ entries)) + + val toGroups = groupMap.getOrElse( fingerprint, null ) + + val fromGroups = entries + .groupBy { result => + val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + (head, last) + } + + val groups = if( toGroups != null ){ + fromGroups ++ toGroups + }else { + val toGroupsNew = old + .groupBy { result => + val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + (head, last) + } + fromGroups ++ toGroupsNew + } + + val mergedList = getListFromGroups(groups) + resultTable.put(fingerprint, mergedList) + groupMap.update(fingerprint, groups) + //println(s"Group insert: fingerprint: ${fingerprint}, Fingerprint hash: ${fingerprint.hashCode()}, group count: ${groups.size}") } } @@ -141,7 +173,7 @@ class HeldTaskCompletion( * flows with maximum length, then we compute a string representation of the flows - taking into account all fields * - and select the flow with maximum length that is smallest in terms of this string representation. */ - private def deduplicateTableEntries(list: List[TableEntry]): List[TableEntry] = { + private def deduplicateTableEntries(list: List[TableEntry]): List[TableEntry]= { list .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get @@ -168,4 +200,44 @@ class HeldTaskCompletion( .toList } + private def mergeGroups(into: List[TableEntry], from: List[TableEntry]): Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]] = { + val toGroups = into + .groupBy { result => + val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + (head, last) + } + + val fromGroups = from + .groupBy { result => + val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + (head, last) + } + + toGroups ++ fromGroups + } + + private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]): List[TableEntry] = { + val mapped = groups.map { case (_, list) => + val lenIdPathPairs = list.map(x => (x.path.length, x)) + val withMaxLength = (lenIdPathPairs.sortBy(_._1).reverse match { + case Nil => Nil + case h :: t => h :: t.takeWhile(y => y._1 == h._1) + }).map(_._2) + + if (withMaxLength.length == 1) { + withMaxLength.head + } else { + withMaxLength.minBy { x => + x.path + .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) + .mkString("-") + } + } + } + mapped.toList + } + + } From 31f968e3ca733c4c10e821f590adb1a32ec88f16 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 18 Jan 2023 18:26:05 +0530 Subject: [PATCH 02/31] Code betterment --- .../queryengine/HeldTaskCompletion.scala | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index d424448c3300..8a94b951739d 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -2,6 +2,7 @@ package io.joern.dataflowengineoss.queryengine import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} +import java.util.concurrent.locks.ReentrantLock import scala.collection.mutable import scala.collection.parallel.CollectionConverters._ @@ -127,10 +128,6 @@ class HeldTaskCompletion( ): Unit = { results.groupBy(_._1).foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) - val old = resultTable.getOrElse(fingerprint, Vector()).toList - - val toGroups = groupMap.getOrElse( fingerprint, null ) - val fromGroups = entries .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get @@ -138,22 +135,19 @@ class HeldTaskCompletion( (head, last) } - val groups = if( toGroups != null ){ - fromGroups ++ toGroups - }else { - val toGroupsNew = old - .groupBy { result => - val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get - val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get - (head, last) - } - fromGroups ++ toGroupsNew - } + val old = resultTable.getOrElse(fingerprint, Vector()).toList + val toGroups = groupMap.getOrElse(fingerprint, old + .groupBy { result => + val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + (head, last) + }) + val groups = fromGroups ++ toGroups val mergedList = getListFromGroups(groups) + resultTable.put(fingerprint, mergedList) groupMap.update(fingerprint, groups) - //println(s"Group insert: fingerprint: ${fingerprint}, Fingerprint hash: ${fingerprint.hashCode()}, group count: ${groups.size}") } } From ecc40e8282d909d7f8685240e573bf84457602e2 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 18 Jan 2023 20:09:34 +0530 Subject: [PATCH 03/31] Removed unused function --- .../queryengine/HeldTaskCompletion.scala | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 8a94b951739d..3fde11033bdb 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -194,24 +194,6 @@ class HeldTaskCompletion( .toList } - private def mergeGroups(into: List[TableEntry], from: List[TableEntry]): Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]] = { - val toGroups = into - .groupBy { result => - val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get - val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get - (head, last) - } - - val fromGroups = from - .groupBy { result => - val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get - val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get - (head, last) - } - - toGroups ++ fromGroups - } - private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]): List[TableEntry] = { val mapped = groups.map { case (_, list) => val lenIdPathPairs = list.map(x => (x.path.length, x)) From e6b18a8bc692bf4deab88baabdb56c413b44542a Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Fri, 20 Jan 2023 13:58:35 +0530 Subject: [PATCH 04/31] More compact code for old --- .../dataflowengineoss/queryengine/HeldTaskCompletion.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 3fde11033bdb..844f22f890d6 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -135,8 +135,7 @@ class HeldTaskCompletion( (head, last) } - val old = resultTable.getOrElse(fingerprint, Vector()).toList - val toGroups = groupMap.getOrElse(fingerprint, old + val toGroups = groupMap.getOrElse(fingerprint, resultTable.getOrElse(fingerprint, Vector()).toList .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get From a7b9aea318d6a0f6ecd2dafbf51a1b81bc96e290 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Fri, 20 Jan 2023 14:21:04 +0530 Subject: [PATCH 05/31] Revert "More compact code for old" This reverts commit e6b18a8bc692bf4deab88baabdb56c413b44542a. --- .../dataflowengineoss/queryengine/HeldTaskCompletion.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 844f22f890d6..3fde11033bdb 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -135,7 +135,8 @@ class HeldTaskCompletion( (head, last) } - val toGroups = groupMap.getOrElse(fingerprint, resultTable.getOrElse(fingerprint, Vector()).toList + val old = resultTable.getOrElse(fingerprint, Vector()).toList + val toGroups = groupMap.getOrElse(fingerprint, old .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get From 0269925f68d15ff782db273c85c7f80916118d63 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Fri, 20 Jan 2023 18:12:37 +0530 Subject: [PATCH 06/31] Cache table entry list --- .../queryengine/HeldTaskCompletion.scala | 56 +++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 3fde11033bdb..01825a461915 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -2,9 +2,9 @@ package io.joern.dataflowengineoss.queryengine import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} -import java.util.concurrent.locks.ReentrantLock import scala.collection.mutable import scala.collection.parallel.CollectionConverters._ +import scala.language.postfixOps /** Complete held tasks using the result table. The result table is modified in the process. * @@ -50,6 +50,7 @@ class HeldTaskCompletion( var changed: Map[TaskFingerprint, Boolean] = allChanged val groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]] = mutable.Map() + val mergedListMap: mutable.Map[TaskFingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]] = mutable.Map() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -65,7 +66,7 @@ class HeldTaskCompletion( changed = noneChanged taskResultsPairs.foreach { case (t, resultsForTask, newResults) => - addCompletedTasksToMainTable(newResults.toList, groupMap) + addCompletedTasksToMainTable(newResults.toList, groupMap, mergedListMap) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } @@ -125,10 +126,11 @@ class HeldTaskCompletion( groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]], + mergedListMap: mutable.Map[TaskFingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]] ): Unit = { results.groupBy(_._1).foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) - val fromGroups = entries + val newGroups = entries .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get @@ -136,18 +138,19 @@ class HeldTaskCompletion( } val old = resultTable.getOrElse(fingerprint, Vector()).toList - val toGroups = groupMap.getOrElse(fingerprint, old + val oldGroups = groupMap.getOrElse(fingerprint, old .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get (head, last) }) - val groups = fromGroups ++ toGroups - val mergedList = getListFromGroups(groups) + val groupListMap = mergedListMap.getOrElse(fingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]()) + val mergedGroups = newGroups ++ oldGroups + val mergedList = getListFromGroups(mergedGroups, groupListMap) resultTable.put(fingerprint, mergedList) - groupMap.update(fingerprint, groups) + groupMap.update(fingerprint, mergedGroups) } } @@ -194,26 +197,35 @@ class HeldTaskCompletion( .toList } - private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]): List[TableEntry] = { - val mapped = groups.map { case (_, list) => - val lenIdPathPairs = list.map(x => (x.path.length, x)) - val withMaxLength = (lenIdPathPairs.sortBy(_._1).reverse match { - case Nil => Nil - case h :: t => h :: t.takeWhile(y => y._1 == h._1) - }).map(_._2) + private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], + groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + ): List[TableEntry] = { + val mapped = groups.map { case (key, list) => + val tableEntry = groupListMap.getOrElse(key, null) - if (withMaxLength.length == 1) { - withMaxLength.head + if (tableEntry != null) { + tableEntry } else { - withMaxLength.minBy { x => - x.path - .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) - .mkString("-") + val lenIdPathPairs = list.map(x => (x.path.length, x)) + val withMaxLength = (lenIdPathPairs.sortBy(_._1).reverse match { + case Nil => Nil + case h :: t => h :: t.takeWhile(y => y._1 == h._1) + }).map(_._2) + + if (withMaxLength.length == 1) { + groupListMap.update( key, withMaxLength.head) + withMaxLength.head + } else { + val tableEntry = withMaxLength.minBy { x => + x.path + .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) + .mkString("-") + } + groupListMap.update( key, tableEntry ) + tableEntry } } } mapped.toList } - - } From e9b24da1df674e93708b0f8ade447734c4dfa15f Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Fri, 20 Jan 2023 19:29:20 +0530 Subject: [PATCH 07/31] Formatted the code --- .../queryengine/HeldTaskCompletion.scala | 69 ++++++++++++------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 01825a461915..ab39d5d3a788 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -21,7 +21,7 @@ import scala.language.postfixOps */ class HeldTaskCompletion( heldTasks: List[ReachableByTask], - resultTable: mutable.Map[TaskFingerprint, List[TableEntry]], + resultTable: mutable.Map[TaskFingerprint, List[TableEntry]] ) { /** Add results produced by held task until no more change can be observed. @@ -49,8 +49,14 @@ class HeldTaskCompletion( def noneChanged = toProcess.map { t => t.fingerprint -> false }.toMap var changed: Map[TaskFingerprint, Boolean] = allChanged - val groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]] = mutable.Map() - val mergedListMap: mutable.Map[TaskFingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]] = mutable.Map() + val groupMap + : mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ + TableEntry + ]]] = mutable.Map() + val mergedListMap: mutable.Map[TaskFingerprint, mutable.Map[ + ((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), + TableEntry + ]] = mutable.Map() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -122,32 +128,42 @@ class HeldTaskCompletion( pathSeq.distinct.size == pathSeq.size } - private def addCompletedTasksToMainTable(results: List[(TaskFingerprint, TableEntry)], - groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], - Boolean), - (CfgNode, List[Call], Boolean)), List[TableEntry]]], - mergedListMap: mutable.Map[TaskFingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]] + private def addCompletedTasksToMainTable( + results: List[(TaskFingerprint, TableEntry)], + groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ + TableEntry + ]]], + mergedListMap: mutable.Map[ + TaskFingerprint, + mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + ] ): Unit = { results.groupBy(_._1).foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) val newGroups = entries - .groupBy { result => - val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get - val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get - (head, last) - } - - val old = resultTable.getOrElse(fingerprint, Vector()).toList - val oldGroups = groupMap.getOrElse(fingerprint, old .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get (head, last) - }) + } - val groupListMap = mergedListMap.getOrElse(fingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]()) + val old = resultTable.getOrElse(fingerprint, Vector()).toList + val oldGroups = groupMap.getOrElse( + fingerprint, + old + .groupBy { result => + val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + val last = result.path.lastOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get + (head, last) + } + ) + + val groupListMap = mergedListMap.getOrElse( + fingerprint, + mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() + ) val mergedGroups = newGroups ++ oldGroups - val mergedList = getListFromGroups(mergedGroups, groupListMap) + val mergedList = getListFromGroups(mergedGroups, groupListMap) resultTable.put(fingerprint, mergedList) groupMap.update(fingerprint, mergedGroups) @@ -170,7 +186,7 @@ class HeldTaskCompletion( * flows with maximum length, then we compute a string representation of the flows - taking into account all fields * - and select the flow with maximum length that is smallest in terms of this string representation. */ - private def deduplicateTableEntries(list: List[TableEntry]): List[TableEntry]= { + private def deduplicateTableEntries(list: List[TableEntry]): List[TableEntry] = { list .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get @@ -197,9 +213,10 @@ class HeldTaskCompletion( .toList } - private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], - groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] - ): List[TableEntry] = { + private def getListFromGroups( + groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], + groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + ): List[TableEntry] = { val mapped = groups.map { case (key, list) => val tableEntry = groupListMap.getOrElse(key, null) @@ -208,12 +225,12 @@ class HeldTaskCompletion( } else { val lenIdPathPairs = list.map(x => (x.path.length, x)) val withMaxLength = (lenIdPathPairs.sortBy(_._1).reverse match { - case Nil => Nil + case Nil => Nil case h :: t => h :: t.takeWhile(y => y._1 == h._1) }).map(_._2) if (withMaxLength.length == 1) { - groupListMap.update( key, withMaxLength.head) + groupListMap.update(key, withMaxLength.head) withMaxLength.head } else { val tableEntry = withMaxLength.minBy { x => @@ -221,7 +238,7 @@ class HeldTaskCompletion( .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) .mkString("-") } - groupListMap.update( key, tableEntry ) + groupListMap.update(key, tableEntry) tableEntry } } From 42daee6bd821693a70f2b239ce05b2356fe9ad93 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 25 Jan 2023 18:23:51 +0530 Subject: [PATCH 08/31] 1. Properly concatenated the lists to avoid missing TableEntries 2. Removed caching for merge list since the new table entries would need a recomputation --- .../queryengine/HeldTaskCompletion.scala | 61 ++++++------------- 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index ab39d5d3a788..ad34d2f9c219 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -53,10 +53,6 @@ class HeldTaskCompletion( : mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry ]]] = mutable.Map() - val mergedListMap: mutable.Map[TaskFingerprint, mutable.Map[ - ((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), - TableEntry - ]] = mutable.Map() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -72,7 +68,7 @@ class HeldTaskCompletion( changed = noneChanged taskResultsPairs.foreach { case (t, resultsForTask, newResults) => - addCompletedTasksToMainTable(newResults.toList, groupMap, mergedListMap) + addCompletedTasksToMainTable(newResults.toList, groupMap) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } @@ -132,11 +128,7 @@ class HeldTaskCompletion( results: List[(TaskFingerprint, TableEntry)], groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry - ]]], - mergedListMap: mutable.Map[ - TaskFingerprint, - mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] - ] + ]]] ): Unit = { results.groupBy(_._1).foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) @@ -158,12 +150,9 @@ class HeldTaskCompletion( } ) - val groupListMap = mergedListMap.getOrElse( - fingerprint, - mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() - ) - val mergedGroups = newGroups ++ oldGroups - val mergedList = getListFromGroups(mergedGroups, groupListMap) + //val mergedGroups = newGroups ++ oldGroups + val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => k -> (v ++ oldGroups.getOrElse(k, List())) } + val mergedList = getListFromGroups(mergedGroups) resultTable.put(fingerprint, mergedList) groupMap.update(fingerprint, mergedGroups) @@ -213,36 +202,26 @@ class HeldTaskCompletion( .toList } - private def getListFromGroups( - groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], - groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] - ): List[TableEntry] = { - val mapped = groups.map { case (key, list) => - val tableEntry = groupListMap.getOrElse(key, null) + private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]): List[TableEntry] = { + val mapped = groups.map { case (_, list) => + val lenIdPathPairs = list.map(x => (x.path.length, x)) + val withMaxLength = (lenIdPathPairs.sortBy(_._1).reverse match { + case Nil => Nil + case h :: t => h :: t.takeWhile(y => y._1 == h._1) + }).map(_._2) - if (tableEntry != null) { - tableEntry + if (withMaxLength.length == 1) { + withMaxLength.head } else { - val lenIdPathPairs = list.map(x => (x.path.length, x)) - val withMaxLength = (lenIdPathPairs.sortBy(_._1).reverse match { - case Nil => Nil - case h :: t => h :: t.takeWhile(y => y._1 == h._1) - }).map(_._2) - - if (withMaxLength.length == 1) { - groupListMap.update(key, withMaxLength.head) - withMaxLength.head - } else { - val tableEntry = withMaxLength.minBy { x => - x.path - .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) - .mkString("-") - } - groupListMap.update(key, tableEntry) - tableEntry + withMaxLength.minBy { x => + x.path + .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) + .mkString("-") } } } mapped.toList } + + } From 214edfcec9a246a5cb7f93890cfb349b206798b1 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 25 Jan 2023 23:01:21 +0530 Subject: [PATCH 09/31] Removed sorting O(n^2) worst case and made it a linear search O(n) to find the max elements --- .../queryengine/HeldTaskCompletion.scala | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index ad34d2f9c219..9283f1ad3792 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -3,6 +3,7 @@ package io.joern.dataflowengineoss.queryengine import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} import scala.collection.mutable +import scala.collection.mutable.ListBuffer import scala.collection.parallel.CollectionConverters._ import scala.language.postfixOps @@ -204,8 +205,21 @@ class HeldTaskCompletion( private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]): List[TableEntry] = { val mapped = groups.map { case (_, list) => - val lenIdPathPairs = list.map(x => (x.path.length, x)) - val withMaxLength = (lenIdPathPairs.sortBy(_._1).reverse match { + val maxLenBuf = ListBuffer[(Int, TableEntry)]() + var maxLen = 0 + var maxLenIndex = 0 + + list.foreach(tableEntry => { + if (tableEntry.path.length > maxLen) { + maxLen = tableEntry.path.length + maxLenBuf.addOne( maxLen, tableEntry) + maxLenIndex = maxLenBuf.length - 1 + } else if (tableEntry.path.length == maxLen) { + maxLenBuf.addOne(maxLen, tableEntry) + } + }) + + val withMaxLength = (maxLenBuf.slice(maxLenIndex, maxLenBuf.length).toList match { case Nil => Nil case h :: t => h :: t.takeWhile(y => y._1 == h._1) }).map(_._2) From a252febcd150dea6570c66dc6da4f451080030a7 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Fri, 27 Jan 2023 18:28:24 +0530 Subject: [PATCH 10/31] Unique hash for each TableEntry --- .../queryengine/HeldTaskCompletion.scala | 23 ++++++++++++------- .../queryengine/TaskSolver.scala | 16 ++++++++++++- .../queryengine/package.scala | 2 +- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 9283f1ad3792..ea72c49eafa3 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -2,6 +2,7 @@ package io.joern.dataflowengineoss.queryengine import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} +import java.security.MessageDigest import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.collection.parallel.CollectionConverters._ @@ -115,7 +116,18 @@ class HeldTaskCompletion( .indexOf((parentTask.sink, parentTask.callSiteStack)) + 1 val initialPathOnlyUpToSink = initialPath.slice(0, stopIndex) val newPath = result.path ++ initialPathOnlyUpToSink - (parentTask, TableEntry(newPath)) + val md = MessageDigest.getInstance("SHA-1") + newPath + .foreach(x => (md.update(x.node.id.toByte), + x.callSiteStack.foreach(x => { + md.update(x.id().toByte) + }), + md.update(x.visible.hashCode().toByte), + md.update(x.isOutputArg.hashCode().toByte), + md.update(x.outEdgeLabel.hashCode.toByte) + )) + val hash = md.digest().toString + (parentTask, TableEntry(newPath, hash)) } .filter { case (_, tableEntry) => containsCycle(tableEntry) } } @@ -151,7 +163,6 @@ class HeldTaskCompletion( } ) - //val mergedGroups = newGroups ++ oldGroups val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => k -> (v ++ oldGroups.getOrElse(k, List())) } val mergedList = getListFromGroups(mergedGroups) @@ -194,9 +205,7 @@ class HeldTaskCompletion( withMaxLength.head } else { withMaxLength.minBy { x => - x.path - .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) - .mkString("-") + x.uniqueHash } } } @@ -228,9 +237,7 @@ class HeldTaskCompletion( withMaxLength.head } else { withMaxLength.minBy { x => - x.path - .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) - .mkString("-") + x.uniqueHash } } } diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/TaskSolver.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/TaskSolver.scala index 2fbe0aa3ff4b..91ac7e99f39f 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/TaskSolver.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/TaskSolver.scala @@ -5,6 +5,7 @@ import io.joern.dataflowengineoss.semanticsloader.Semantics import io.shiftleft.codepropertygraph.generated.nodes._ import io.shiftleft.semanticcpg.language.{toCfgNodeMethods, toExpressionMethods} +import java.security.MessageDigest import java.util.concurrent.Callable import scala.collection.mutable @@ -49,7 +50,20 @@ class TaskSolver(task: ReachableByTask, context: EngineContext, sources: Set[Cfg val parentTask = r.taskStack(i) val pathToSink = r.path.slice(0, r.path.map(_.node).indexOf(parentTask.sink)) val newPath = pathToSink :+ PathElement(parentTask.sink, parentTask.callSiteStack) - (parentTask, TableEntry(path = newPath)) + + val md = MessageDigest.getInstance("SHA-1") + newPath + .foreach(x => (md.update(x.node.id.toByte), + x.callSiteStack.foreach(x => { + md.update(x.id().toByte) + }), + md.update(x.visible.hashCode().toByte), + md.update(x.isOutputArg.hashCode().toByte), + md.update(x.outEdgeLabel.hashCode.toByte) + )) + val hash = md.digest().toString + + (parentTask, TableEntry(path = newPath, uniqueHash = hash)) }.toList } diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/package.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/package.scala index 249ac0f7ffac..8a5a06c21371 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/package.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/package.scala @@ -99,6 +99,6 @@ package object queryengine { } case class TaskSummary(tableEntries: Vector[(TaskFingerprint, TableEntry)], followupTasks: Vector[ReachableByTask]) - case class TableEntry(path: Vector[PathElement]) + case class TableEntry(path: Vector[PathElement], uniqueHash: String) } From 22f0cb125cabf596c3aacc2b0f97d2608d095b5a Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 16:56:39 +0530 Subject: [PATCH 11/31] Lazy computation of Sha1 hash --- .../queryengine/HeldTaskCompletion.scala | 37 ++++++++++++++----- .../queryengine/TaskSolver.scala | 14 +------ .../queryengine/package.scala | 2 +- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index ea72c49eafa3..2b08576e90ed 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -2,6 +2,7 @@ package io.joern.dataflowengineoss.queryengine import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} +import java.nio.charset.StandardCharsets import java.security.MessageDigest import scala.collection.mutable import scala.collection.mutable.ListBuffer @@ -39,7 +40,7 @@ class HeldTaskCompletion( * created, `changed` is set to true for the result's table entry and `resultsProductByTask` is updated. */ def completeHeldTasks(): Unit = { - + val tableEntryHash = mutable.HashMap[TableEntry, String]() deduplicateResultTable() val toProcess = heldTasks.distinct.sortBy(x => @@ -70,7 +71,7 @@ class HeldTaskCompletion( changed = noneChanged taskResultsPairs.foreach { case (t, resultsForTask, newResults) => - addCompletedTasksToMainTable(newResults.toList, groupMap) + addCompletedTasksToMainTable(newResults.toList, groupMap, tableEntryHash) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } @@ -127,7 +128,7 @@ class HeldTaskCompletion( md.update(x.outEdgeLabel.hashCode.toByte) )) val hash = md.digest().toString - (parentTask, TableEntry(newPath, hash)) + (parentTask, TableEntry(newPath)) } .filter { case (_, tableEntry) => containsCycle(tableEntry) } } @@ -140,8 +141,9 @@ class HeldTaskCompletion( private def addCompletedTasksToMainTable( results: List[(TaskFingerprint, TableEntry)], groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ - TableEntry - ]]] + TableEntry, + ]]], + tableEntryHash : mutable.HashMap[TableEntry, String] ): Unit = { results.groupBy(_._1).foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) @@ -164,7 +166,7 @@ class HeldTaskCompletion( ) val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => k -> (v ++ oldGroups.getOrElse(k, List())) } - val mergedList = getListFromGroups(mergedGroups) + val mergedList = getListFromGroups(mergedGroups, tableEntryHash) resultTable.put(fingerprint, mergedList) groupMap.update(fingerprint, mergedGroups) @@ -205,14 +207,17 @@ class HeldTaskCompletion( withMaxLength.head } else { withMaxLength.minBy { x => - x.uniqueHash + x.path + .map(x => (x.node.id, x.callSiteStack.map(_.id), x.visible, x.isOutputArg, x.outEdgeLabel).toString) + .mkString("-") } } } .toList } - private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]]): List[TableEntry] = { + private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], + tableEntryHash : mutable.HashMap[TableEntry, String]): List[TableEntry] = { val mapped = groups.map { case (_, list) => val maxLenBuf = ListBuffer[(Int, TableEntry)]() var maxLen = 0 @@ -237,7 +242,21 @@ class HeldTaskCompletion( withMaxLength.head } else { withMaxLength.minBy { x => - x.uniqueHash + val strForHash = tableEntryHash.getOrElse(x, ({ + val md = MessageDigest.getInstance("SHA-1") + val strForHash = x.path + .foreach(x => (md.update(x.node.id.toByte), + x.callSiteStack.foreach(x => { + md.update(x.id().toByte) + }), + md.update(x.visible.toString.getBytes(StandardCharsets.UTF_8)), + md.update(x.isOutputArg.toString.getBytes(StandardCharsets.UTF_8)), + md.update(x.outEdgeLabel.getBytes(StandardCharsets.UTF_8))) + ) + md.digest().toString + })) + tableEntryHash.update(x, strForHash) + strForHash } } } diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/TaskSolver.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/TaskSolver.scala index 91ac7e99f39f..ba96cc3ea606 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/TaskSolver.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/TaskSolver.scala @@ -51,19 +51,7 @@ class TaskSolver(task: ReachableByTask, context: EngineContext, sources: Set[Cfg val pathToSink = r.path.slice(0, r.path.map(_.node).indexOf(parentTask.sink)) val newPath = pathToSink :+ PathElement(parentTask.sink, parentTask.callSiteStack) - val md = MessageDigest.getInstance("SHA-1") - newPath - .foreach(x => (md.update(x.node.id.toByte), - x.callSiteStack.foreach(x => { - md.update(x.id().toByte) - }), - md.update(x.visible.hashCode().toByte), - md.update(x.isOutputArg.hashCode().toByte), - md.update(x.outEdgeLabel.hashCode.toByte) - )) - val hash = md.digest().toString - - (parentTask, TableEntry(path = newPath, uniqueHash = hash)) + (parentTask, TableEntry(path = newPath)) }.toList } diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/package.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/package.scala index 8a5a06c21371..249ac0f7ffac 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/package.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/package.scala @@ -99,6 +99,6 @@ package object queryengine { } case class TaskSummary(tableEntries: Vector[(TaskFingerprint, TableEntry)], followupTasks: Vector[ReachableByTask]) - case class TableEntry(path: Vector[PathElement], uniqueHash: String) + case class TableEntry(path: Vector[PathElement]) } From 9dfadd79ae1001a320b18acc8fc90960626b7a67 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 17:14:58 +0530 Subject: [PATCH 12/31] Reintroduced caching of table entry hashes --- .../queryengine/HeldTaskCompletion.scala | 49 ++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 2b08576e90ed..285141302be7 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -56,6 +56,10 @@ class HeldTaskCompletion( : mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry ]]] = mutable.Map() + val mergedListMap: mutable.Map[TaskFingerprint, mutable.Map[ + ((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), + TableEntry + ]] = mutable.Map() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -71,7 +75,7 @@ class HeldTaskCompletion( changed = noneChanged taskResultsPairs.foreach { case (t, resultsForTask, newResults) => - addCompletedTasksToMainTable(newResults.toList, groupMap, tableEntryHash) + addCompletedTasksToMainTable(newResults.toList, groupMap, tableEntryHash, mergedListMap) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } @@ -143,7 +147,11 @@ class HeldTaskCompletion( groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry, ]]], - tableEntryHash : mutable.HashMap[TableEntry, String] + tableEntryHash : mutable.HashMap[TableEntry, String], + mergedListMap: mutable.Map[ + TaskFingerprint, + mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + ] ): Unit = { results.groupBy(_._1).foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) @@ -165,11 +173,16 @@ class HeldTaskCompletion( } ) + val groupListMap = mergedListMap.getOrElse( + fingerprint, + mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() + ) val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => k -> (v ++ oldGroups.getOrElse(k, List())) } - val mergedList = getListFromGroups(mergedGroups, tableEntryHash) + val mergedList = getListFromGroups(mergedGroups, tableEntryHash, groupListMap) resultTable.put(fingerprint, mergedList) groupMap.update(fingerprint, mergedGroups) + mergedListMap.update(fingerprint, groupListMap) } } @@ -217,19 +230,27 @@ class HeldTaskCompletion( } private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], - tableEntryHash : mutable.HashMap[TableEntry, String]): List[TableEntry] = { - val mapped = groups.map { case (_, list) => + tableEntryHash : mutable.HashMap[TableEntry, String], + groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + ): List[TableEntry] = { + val mapped = groups.map { case (key, list) => + val tableEntry = groupListMap.getOrElse(key, null) + //TODO extra table entries yet to be accounted for + + if (tableEntry != null) { + tableEntry + } else { val maxLenBuf = ListBuffer[(Int, TableEntry)]() var maxLen = 0 var maxLenIndex = 0 - list.foreach(tableEntry => { - if (tableEntry.path.length > maxLen) { - maxLen = tableEntry.path.length - maxLenBuf.addOne( maxLen, tableEntry) + list.foreach(t => { + if (t.path.length > maxLen) { + maxLen = t.path.length + maxLenBuf.addOne( maxLen, t) maxLenIndex = maxLenBuf.length - 1 - } else if (tableEntry.path.length == maxLen) { - maxLenBuf.addOne(maxLen, tableEntry) + } else if (t.path.length == maxLen) { + maxLenBuf.addOne(maxLen, t) } }) @@ -239,9 +260,10 @@ class HeldTaskCompletion( }).map(_._2) if (withMaxLength.length == 1) { + groupListMap.update(key, withMaxLength.head) withMaxLength.head } else { - withMaxLength.minBy { x => + val tableEntry = withMaxLength.minBy { x => val strForHash = tableEntryHash.getOrElse(x, ({ val md = MessageDigest.getInstance("SHA-1") val strForHash = x.path @@ -258,6 +280,9 @@ class HeldTaskCompletion( tableEntryHash.update(x, strForHash) strForHash } + groupListMap.update(key, tableEntry) + tableEntry + } } } mapped.toList From 16323fd162bfa0fab65d5e70fc47c7a88b460ed3 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 17:25:01 +0530 Subject: [PATCH 13/31] Variable renaming --- .../queryengine/HeldTaskCompletion.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 285141302be7..3ddd9377275a 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -264,9 +264,9 @@ class HeldTaskCompletion( withMaxLength.head } else { val tableEntry = withMaxLength.minBy { x => - val strForHash = tableEntryHash.getOrElse(x, ({ + val sha1Hash = tableEntryHash.getOrElse(x, ({ val md = MessageDigest.getInstance("SHA-1") - val strForHash = x.path + x.path .foreach(x => (md.update(x.node.id.toByte), x.callSiteStack.foreach(x => { md.update(x.id().toByte) @@ -277,8 +277,8 @@ class HeldTaskCompletion( ) md.digest().toString })) - tableEntryHash.update(x, strForHash) - strForHash + tableEntryHash.update(x, sha1Hash) + sha1Hash } groupListMap.update(key, tableEntry) tableEntry From 34372f44a23cf79353a8f171e202c91d89a43b16 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 17:25:45 +0530 Subject: [PATCH 14/31] Formatted the code --- .../queryengine/HeldTaskCompletion.scala | 119 ++++++++++-------- 1 file changed, 64 insertions(+), 55 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 3ddd9377275a..bce786404731 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -121,16 +121,19 @@ class HeldTaskCompletion( .indexOf((parentTask.sink, parentTask.callSiteStack)) + 1 val initialPathOnlyUpToSink = initialPath.slice(0, stopIndex) val newPath = result.path ++ initialPathOnlyUpToSink - val md = MessageDigest.getInstance("SHA-1") + val md = MessageDigest.getInstance("SHA-1") newPath - .foreach(x => (md.update(x.node.id.toByte), - x.callSiteStack.foreach(x => { - md.update(x.id().toByte) - }), - md.update(x.visible.hashCode().toByte), - md.update(x.isOutputArg.hashCode().toByte), - md.update(x.outEdgeLabel.hashCode.toByte) - )) + .foreach(x => + ( + md.update(x.node.id.toByte), + x.callSiteStack.foreach(x => { + md.update(x.id().toByte) + }), + md.update(x.visible.hashCode().toByte), + md.update(x.isOutputArg.hashCode().toByte), + md.update(x.outEdgeLabel.hashCode.toByte) + ) + ) val hash = md.digest().toString (parentTask, TableEntry(newPath)) } @@ -147,7 +150,7 @@ class HeldTaskCompletion( groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry, ]]], - tableEntryHash : mutable.HashMap[TableEntry, String], + tableEntryHash: mutable.HashMap[TableEntry, String], mergedListMap: mutable.Map[ TaskFingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] @@ -229,64 +232,70 @@ class HeldTaskCompletion( .toList } - private def getListFromGroups(groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], - tableEntryHash : mutable.HashMap[TableEntry, String], - groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] - ): List[TableEntry] = { + private def getListFromGroups( + groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], + tableEntryHash: mutable.HashMap[TableEntry, String], + groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + ): List[TableEntry] = { val mapped = groups.map { case (key, list) => val tableEntry = groupListMap.getOrElse(key, null) - //TODO extra table entries yet to be accounted for + // TODO extra table entries yet to be accounted for if (tableEntry != null) { tableEntry } else { - val maxLenBuf = ListBuffer[(Int, TableEntry)]() - var maxLen = 0 - var maxLenIndex = 0 + val maxLenBuf = ListBuffer[(Int, TableEntry)]() + var maxLen = 0 + var maxLenIndex = 0 - list.foreach(t => { - if (t.path.length > maxLen) { - maxLen = t.path.length - maxLenBuf.addOne( maxLen, t) - maxLenIndex = maxLenBuf.length - 1 - } else if (t.path.length == maxLen) { - maxLenBuf.addOne(maxLen, t) - } - }) + list.foreach(t => { + if (t.path.length > maxLen) { + maxLen = t.path.length + maxLenBuf.addOne(maxLen, t) + maxLenIndex = maxLenBuf.length - 1 + } else if (t.path.length == maxLen) { + maxLenBuf.addOne(maxLen, t) + } + }) - val withMaxLength = (maxLenBuf.slice(maxLenIndex, maxLenBuf.length).toList match { - case Nil => Nil - case h :: t => h :: t.takeWhile(y => y._1 == h._1) - }).map(_._2) + val withMaxLength = (maxLenBuf.slice(maxLenIndex, maxLenBuf.length).toList match { + case Nil => Nil + case h :: t => h :: t.takeWhile(y => y._1 == h._1) + }).map(_._2) - if (withMaxLength.length == 1) { - groupListMap.update(key, withMaxLength.head) - withMaxLength.head - } else { - val tableEntry = withMaxLength.minBy { x => - val sha1Hash = tableEntryHash.getOrElse(x, ({ - val md = MessageDigest.getInstance("SHA-1") - x.path - .foreach(x => (md.update(x.node.id.toByte), - x.callSiteStack.foreach(x => { - md.update(x.id().toByte) - }), - md.update(x.visible.toString.getBytes(StandardCharsets.UTF_8)), - md.update(x.isOutputArg.toString.getBytes(StandardCharsets.UTF_8)), - md.update(x.outEdgeLabel.getBytes(StandardCharsets.UTF_8))) - ) - md.digest().toString - })) - tableEntryHash.update(x, sha1Hash) - sha1Hash + if (withMaxLength.length == 1) { + groupListMap.update(key, withMaxLength.head) + withMaxLength.head + } else { + val tableEntry = withMaxLength.minBy { x => + val sha1Hash = tableEntryHash.getOrElse( + x, + ({ + val md = MessageDigest.getInstance("SHA-1") + x.path + .foreach(x => + ( + md.update(x.node.id.toByte), + x.callSiteStack.foreach(x => { + md.update(x.id().toByte) + }), + md.update(x.visible.toString.getBytes(StandardCharsets.UTF_8)), + md.update(x.isOutputArg.toString.getBytes(StandardCharsets.UTF_8)), + md.update(x.outEdgeLabel.getBytes(StandardCharsets.UTF_8)) + ) + ) + md.digest().toString + }) + ) + tableEntryHash.update(x, sha1Hash) + sha1Hash + } + groupListMap.update(key, tableEntry) + tableEntry } - groupListMap.update(key, tableEntry) - tableEntry - } } } mapped.toList } - } From 04cc73d22727e22d332afd2e5950cc4e2f3081fc Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 17:29:34 +0530 Subject: [PATCH 15/31] Moved hash computation to a different function --- .../queryengine/HeldTaskCompletion.scala | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index bce786404731..2ff06bba7e16 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -268,27 +268,7 @@ class HeldTaskCompletion( withMaxLength.head } else { val tableEntry = withMaxLength.minBy { x => - val sha1Hash = tableEntryHash.getOrElse( - x, - ({ - val md = MessageDigest.getInstance("SHA-1") - x.path - .foreach(x => - ( - md.update(x.node.id.toByte), - x.callSiteStack.foreach(x => { - md.update(x.id().toByte) - }), - md.update(x.visible.toString.getBytes(StandardCharsets.UTF_8)), - md.update(x.isOutputArg.toString.getBytes(StandardCharsets.UTF_8)), - md.update(x.outEdgeLabel.getBytes(StandardCharsets.UTF_8)) - ) - ) - md.digest().toString - }) - ) - tableEntryHash.update(x, sha1Hash) - sha1Hash + getSHA1Hash(x, tableEntryHash) } groupListMap.update(key, tableEntry) tableEntry @@ -298,4 +278,26 @@ class HeldTaskCompletion( mapped.toList } + private def getSHA1Hash(tableEntry: TableEntry, tableEntryHash: mutable.HashMap[TableEntry, String]): String = { + tableEntryHash.getOrElse( + tableEntry, + ({ + val md = MessageDigest.getInstance("SHA-1") + tableEntry.path + .foreach(x => + ( + md.update(x.node.id.toByte), + x.callSiteStack.foreach(x => { + md.update(x.id().toByte) + }), + md.update(x.visible.toString.getBytes(StandardCharsets.UTF_8)), + md.update(x.isOutputArg.toString.getBytes(StandardCharsets.UTF_8)), + md.update(x.outEdgeLabel.getBytes(StandardCharsets.UTF_8)) + ) + ) + md.digest().toString + }) + ) + } + } From f1ef6999b141cee802984ef308d65506e5dae0e7 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 17:56:28 +0530 Subject: [PATCH 16/31] Merging code improvement --- .../dataflowengineoss/queryengine/HeldTaskCompletion.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 2ff06bba7e16..110e91de0df0 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -180,7 +180,11 @@ class HeldTaskCompletion( fingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() ) - val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => k -> (v ++ oldGroups.getOrElse(k, List())) } + val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => k -> ({ + val old = oldGroups.getOrElse(k, List()) + //println(s"Old list length: ${old.length} New list length: ${v.length}") + v ++ old + }) } val mergedList = getListFromGroups(mergedGroups, tableEntryHash, groupListMap) resultTable.put(fingerprint, mergedList) From e5afde136eb820ab77aac99bd0804a24004b16cd Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 18:08:24 +0530 Subject: [PATCH 17/31] Filter on max length --- .../queryengine/HeldTaskCompletion.scala | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 110e91de0df0..01630c584549 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -40,7 +40,6 @@ class HeldTaskCompletion( * created, `changed` is set to true for the result's table entry and `resultsProductByTask` is updated. */ def completeHeldTasks(): Unit = { - val tableEntryHash = mutable.HashMap[TableEntry, String]() deduplicateResultTable() val toProcess = heldTasks.distinct.sortBy(x => @@ -60,6 +59,7 @@ class HeldTaskCompletion( ((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry ]] = mutable.Map() + val tableEntryHash = mutable.HashMap[TableEntry, String]() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -180,12 +180,17 @@ class HeldTaskCompletion( fingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() ) - val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => k -> ({ - val old = oldGroups.getOrElse(k, List()) - //println(s"Old list length: ${old.length} New list length: ${v.length}") - v ++ old - }) } - val mergedList = getListFromGroups(mergedGroups, tableEntryHash, groupListMap) + val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => + k -> ({ + val old = oldGroups.getOrElse(k, List()) + val maxLen = groupListMap.get(k) match { + case Some(tableEntry: TableEntry) => tableEntry.path.length + case None => 0 + } + old ++ v.filterNot(x => { x.path.length < maxLen }) + }) + } + val mergedList = getListFromGroups(mergedGroups, tableEntryHash, groupListMap) resultTable.put(fingerprint, mergedList) groupMap.update(fingerprint, mergedGroups) From ee78f6bf5b535023dc618cd812c2d36017ec103c Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 18:33:54 +0530 Subject: [PATCH 18/31] Filtering while appendin new list --- .../queryengine/HeldTaskCompletion.scala | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 01630c584549..0a5e9b3fb58f 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -181,14 +181,27 @@ class HeldTaskCompletion( mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() ) val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => - k -> ({ + k -> { val old = oldGroups.getOrElse(k, List()) val maxLen = groupListMap.get(k) match { case Some(tableEntry: TableEntry) => tableEntry.path.length case None => 0 } - old ++ v.filterNot(x => { x.path.length < maxLen }) - }) + val gtOrEqualMax = v.filter(x => x.path.length >= maxLen) + val gtMax = gtOrEqualMax.filter(x => x.path.length > maxLen) + + if (gtMax.length > 0) { + // new list contains elements with paths exceeding the max. retain new list elements only + gtMax + } else if (gtOrEqualMax == 0) { + // new list contains all elements with paths less than the max. retain old list elements only + old + } else { + // new list contains all elements with paths less than or equal to the max but not exeeding it. + // append new list elements that are equal to max + old ++ gtOrEqualMax.filter(x => x.path.length == maxLen) + } + } } val mergedList = getListFromGroups(mergedGroups, tableEntryHash, groupListMap) From 9ebba8f602b1140770a77094ee3c94ff0ea514ba Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 19:39:41 +0530 Subject: [PATCH 19/31] Accounted for new max entries --- .../queryengine/HeldTaskCompletion.scala | 57 ++++++++----------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 0a5e9b3fb58f..e3702c916ecb 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -5,7 +5,6 @@ import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} import java.nio.charset.StandardCharsets import java.security.MessageDigest import scala.collection.mutable -import scala.collection.mutable.ListBuffer import scala.collection.parallel.CollectionConverters._ import scala.language.postfixOps @@ -192,13 +191,22 @@ class HeldTaskCompletion( if (gtMax.length > 0) { // new list contains elements with paths exceeding the max. retain new list elements only - gtMax + // that have max length + groupListMap.remove(k) + var newMaxLen = maxLen + gtMax.foreach(x => { + if (x.path.length > newMaxLen) { + newMaxLen = x.path.length + } + }) + gtMax.filter(x => x.path.length == newMaxLen) } else if (gtOrEqualMax == 0) { // new list contains all elements with paths less than the max. retain old list elements only old } else { - // new list contains all elements with paths less than or equal to the max but not exeeding it. + // new list contains all elements with paths less than or equal to the max but not exceeding it. // append new list elements that are equal to max + groupListMap.remove(k) old ++ gtOrEqualMax.filter(x => x.path.length == maxLen) } } @@ -261,35 +269,15 @@ class HeldTaskCompletion( ): List[TableEntry] = { val mapped = groups.map { case (key, list) => val tableEntry = groupListMap.getOrElse(key, null) - // TODO extra table entries yet to be accounted for - if (tableEntry != null) { tableEntry } else { - val maxLenBuf = ListBuffer[(Int, TableEntry)]() - var maxLen = 0 - var maxLenIndex = 0 - - list.foreach(t => { - if (t.path.length > maxLen) { - maxLen = t.path.length - maxLenBuf.addOne(maxLen, t) - maxLenIndex = maxLenBuf.length - 1 - } else if (t.path.length == maxLen) { - maxLenBuf.addOne(maxLen, t) - } - }) - - val withMaxLength = (maxLenBuf.slice(maxLenIndex, maxLenBuf.length).toList match { - case Nil => Nil - case h :: t => h :: t.takeWhile(y => y._1 == h._1) - }).map(_._2) - - if (withMaxLength.length == 1) { - groupListMap.update(key, withMaxLength.head) - withMaxLength.head + if (list.length == 1) { + groupListMap.update(key, list.head) + list.head } else { - val tableEntry = withMaxLength.minBy { x => + // println(s"Computing hashes of ${list.length} elements") + val tableEntry = list.minBy { x => getSHA1Hash(x, tableEntryHash) } groupListMap.update(key, tableEntry) @@ -301,10 +289,11 @@ class HeldTaskCompletion( } private def getSHA1Hash(tableEntry: TableEntry, tableEntryHash: mutable.HashMap[TableEntry, String]): String = { - tableEntryHash.getOrElse( - tableEntry, - ({ - val md = MessageDigest.getInstance("SHA-1") + val sha1Hash = tableEntryHash.get(tableEntry) match { + case Some(sha1Hash) => + sha1Hash + case None => + val md = MessageDigest.getInstance("MD5") tableEntry.path .foreach(x => ( @@ -318,8 +307,8 @@ class HeldTaskCompletion( ) ) md.digest().toString - }) - ) + } + sha1Hash } } From 83dc73fa4bde9d29266e85fd0242f554d7e43d62 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 22:39:03 +0530 Subject: [PATCH 20/31] Removed SHA/MD5 usage since it is expensive --- .../queryengine/HeldTaskCompletion.scala | 91 ++++++++----------- 1 file changed, 37 insertions(+), 54 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index e3702c916ecb..6aef679af81f 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -2,8 +2,6 @@ package io.joern.dataflowengineoss.queryengine import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} -import java.nio.charset.StandardCharsets -import java.security.MessageDigest import scala.collection.mutable import scala.collection.parallel.CollectionConverters._ import scala.language.postfixOps @@ -58,7 +56,6 @@ class HeldTaskCompletion( ((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry ]] = mutable.Map() - val tableEntryHash = mutable.HashMap[TableEntry, String]() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -74,7 +71,7 @@ class HeldTaskCompletion( changed = noneChanged taskResultsPairs.foreach { case (t, resultsForTask, newResults) => - addCompletedTasksToMainTable(newResults.toList, groupMap, tableEntryHash, mergedListMap) + addCompletedTasksToMainTable(newResults.toList, groupMap, mergedListMap) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } @@ -120,20 +117,6 @@ class HeldTaskCompletion( .indexOf((parentTask.sink, parentTask.callSiteStack)) + 1 val initialPathOnlyUpToSink = initialPath.slice(0, stopIndex) val newPath = result.path ++ initialPathOnlyUpToSink - val md = MessageDigest.getInstance("SHA-1") - newPath - .foreach(x => - ( - md.update(x.node.id.toByte), - x.callSiteStack.foreach(x => { - md.update(x.id().toByte) - }), - md.update(x.visible.hashCode().toByte), - md.update(x.isOutputArg.hashCode().toByte), - md.update(x.outEdgeLabel.hashCode.toByte) - ) - ) - val hash = md.digest().toString (parentTask, TableEntry(newPath)) } .filter { case (_, tableEntry) => containsCycle(tableEntry) } @@ -149,7 +132,6 @@ class HeldTaskCompletion( groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry, ]]], - tableEntryHash: mutable.HashMap[TableEntry, String], mergedListMap: mutable.Map[ TaskFingerprint, mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] @@ -199,7 +181,8 @@ class HeldTaskCompletion( newMaxLen = x.path.length } }) - gtMax.filter(x => x.path.length == newMaxLen) + val element = gtMax.filter(x => x.path.length == newMaxLen).sortWith( compareTableEntries ).head + List(element) } else if (gtOrEqualMax == 0) { // new list contains all elements with paths less than the max. retain old list elements only old @@ -207,11 +190,12 @@ class HeldTaskCompletion( // new list contains all elements with paths less than or equal to the max but not exceeding it. // append new list elements that are equal to max groupListMap.remove(k) - old ++ gtOrEqualMax.filter(x => x.path.length == maxLen) + val element = (old ++ gtOrEqualMax.filter(x => x.path.length == maxLen)).sortWith( compareTableEntries ).head + List(element) } } } - val mergedList = getListFromGroups(mergedGroups, tableEntryHash, groupListMap) + val mergedList = getListFromGroups(mergedGroups, groupListMap) resultTable.put(fingerprint, mergedList) groupMap.update(fingerprint, mergedGroups) @@ -264,7 +248,6 @@ class HeldTaskCompletion( private def getListFromGroups( groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], - tableEntryHash: mutable.HashMap[TableEntry, String], groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] ): List[TableEntry] = { val mapped = groups.map { case (key, list) => @@ -272,43 +255,43 @@ class HeldTaskCompletion( if (tableEntry != null) { tableEntry } else { - if (list.length == 1) { - groupListMap.update(key, list.head) - list.head - } else { - // println(s"Computing hashes of ${list.length} elements") - val tableEntry = list.minBy { x => - getSHA1Hash(x, tableEntryHash) - } - groupListMap.update(key, tableEntry) - tableEntry - } + list.head } } mapped.toList } - private def getSHA1Hash(tableEntry: TableEntry, tableEntryHash: mutable.HashMap[TableEntry, String]): String = { - val sha1Hash = tableEntryHash.get(tableEntry) match { - case Some(sha1Hash) => - sha1Hash - case None => - val md = MessageDigest.getInstance("MD5") - tableEntry.path - .foreach(x => - ( - md.update(x.node.id.toByte), - x.callSiteStack.foreach(x => { - md.update(x.id().toByte) - }), - md.update(x.visible.toString.getBytes(StandardCharsets.UTF_8)), - md.update(x.isOutputArg.toString.getBytes(StandardCharsets.UTF_8)), - md.update(x.outEdgeLabel.getBytes(StandardCharsets.UTF_8)) - ) - ) - md.digest().toString + private def compareTableEntries( entry1: TableEntry, entry2: TableEntry): Boolean = { + if(entry1.path.length != entry2.path.length){ + return entry1.path.length < entry2.path.length } - sha1Hash + + val paths = entry1.path zip entry2.path + + paths.foreach( x =>{ + val left = x._1 + val right = x._2 + + if( left.callSiteStack.length != right.callSiteStack.length){ + return left.callSiteStack.length < right.callSiteStack.length + } + + if( left.node.id() != right.node.id()){ + return left.node.id() < right.node.id() + } + + if( left.isOutputArg != right.isOutputArg){ + return left.isOutputArg + } + + if(left.visible != right.visible){ + return left.visible + } + }) + + //TODO remove this + println("Returned default false because the comparison was not god enoough") + false } } From d4eba2652febc0cacd667e170f55196073e8766a Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Mon, 30 Jan 2023 23:07:50 +0530 Subject: [PATCH 21/31] Used minBy() instead of sorting --- .../queryengine/HeldTaskCompletion.scala | 42 +++++-------------- 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 6aef679af81f..ede55a3c5ae7 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -181,7 +181,7 @@ class HeldTaskCompletion( newMaxLen = x.path.length } }) - val element = gtMax.filter(x => x.path.length == newMaxLen).sortWith( compareTableEntries ).head + val element = gtMax.filter(x => x.path.length == newMaxLen).minBy(computePriority) List(element) } else if (gtOrEqualMax == 0) { // new list contains all elements with paths less than the max. retain old list elements only @@ -190,7 +190,7 @@ class HeldTaskCompletion( // new list contains all elements with paths less than or equal to the max but not exceeding it. // append new list elements that are equal to max groupListMap.remove(k) - val element = (old ++ gtOrEqualMax.filter(x => x.path.length == maxLen)).sortWith( compareTableEntries ).head + val element = (old ++ gtOrEqualMax.filter(x => x.path.length == maxLen)).minBy(computePriority) List(element) } } @@ -261,37 +261,17 @@ class HeldTaskCompletion( mapped.toList } - private def compareTableEntries( entry1: TableEntry, entry2: TableEntry): Boolean = { - if(entry1.path.length != entry2.path.length){ - return entry1.path.length < entry2.path.length - } - - val paths = entry1.path zip entry2.path - - paths.foreach( x =>{ - val left = x._1 - val right = x._2 + private def computePriority(entry: TableEntry): BigInt = { + var priority : BigInt = entry.path.length + val multiplier: BigInt = 131072 //2^17 - if( left.callSiteStack.length != right.callSiteStack.length){ - return left.callSiteStack.length < right.callSiteStack.length - } - - if( left.node.id() != right.node.id()){ - return left.node.id() < right.node.id() - } - - if( left.isOutputArg != right.isOutputArg){ - return left.isOutputArg - } - - if(left.visible != right.visible){ - return left.visible - } + entry.path.foreach( element => { + priority = priority + element.callSiteStack.length*multiplier + priority = priority + element.node.id()*multiplier*64 + priority = priority + element.isOutputArg.hashCode()*multiplier*64 + priority = priority + element.visible.hashCode()*multiplier*64 }) - - //TODO remove this - println("Returned default false because the comparison was not god enoough") - false + priority } } From b56b25fbe2ccd0f4489f8ad790d760a87bbaaa21 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Tue, 31 Jan 2023 13:09:08 +0530 Subject: [PATCH 22/31] Updated group list map that was missed earlier --- .../joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index ede55a3c5ae7..d7fa8e065e40 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -255,6 +255,7 @@ class HeldTaskCompletion( if (tableEntry != null) { tableEntry } else { + groupListMap.update(key, list.head) list.head } } From a86622719de431182099bd6de7be32415ed6d591 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Tue, 31 Jan 2023 18:22:45 +0530 Subject: [PATCH 23/31] 1. Converted merge list to hash map 2. Used parallelization --- .../queryengine/HeldTaskCompletion.scala | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index d7fa8e065e40..9eeaf4c6cd22 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -52,7 +52,7 @@ class HeldTaskCompletion( : mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry ]]] = mutable.Map() - val mergedListMap: mutable.Map[TaskFingerprint, mutable.Map[ + val mergedListMap: mutable.Map[TaskFingerprint, mutable.HashMap[ ((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry ]] = mutable.Map() @@ -134,10 +134,10 @@ class HeldTaskCompletion( ]]], mergedListMap: mutable.Map[ TaskFingerprint, - mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + mutable.HashMap[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] ] ): Unit = { - results.groupBy(_._1).foreach { case (fingerprint, resultList) => + results.groupBy(_._1).par.foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) val newGroups = entries .groupBy { result => @@ -159,12 +159,13 @@ class HeldTaskCompletion( val groupListMap = mergedListMap.getOrElse( fingerprint, - mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() + mutable.HashMap[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() ) - val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => + val mergedGroups = oldGroups ++ newGroups.par.map { case (k, v) => k -> { val old = oldGroups.getOrElse(k, List()) - val maxLen = groupListMap.get(k) match { + val value = synchronized(groupListMap.get(k)) + val maxLen = value match { case Some(tableEntry: TableEntry) => tableEntry.path.length case None => 0 } @@ -174,14 +175,14 @@ class HeldTaskCompletion( if (gtMax.length > 0) { // new list contains elements with paths exceeding the max. retain new list elements only // that have max length - groupListMap.remove(k) + synchronized(groupListMap.remove(k)) var newMaxLen = maxLen gtMax.foreach(x => { if (x.path.length > newMaxLen) { newMaxLen = x.path.length } }) - val element = gtMax.filter(x => x.path.length == newMaxLen).minBy(computePriority) + val element = gtMax.filter(x => x.path.length == newMaxLen).par.minBy(computePriority) List(element) } else if (gtOrEqualMax == 0) { // new list contains all elements with paths less than the max. retain old list elements only @@ -189,8 +190,8 @@ class HeldTaskCompletion( } else { // new list contains all elements with paths less than or equal to the max but not exceeding it. // append new list elements that are equal to max - groupListMap.remove(k) - val element = (old ++ gtOrEqualMax.filter(x => x.path.length == maxLen)).minBy(computePriority) + synchronized(groupListMap.remove(k)) + val element = (old ++ gtOrEqualMax.par.filter(x => x.path.length == maxLen)).par.minBy(computePriority) List(element) } } @@ -248,16 +249,11 @@ class HeldTaskCompletion( private def getListFromGroups( groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], - groupListMap: mutable.Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + groupListMap: mutable.HashMap[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] ): List[TableEntry] = { val mapped = groups.map { case (key, list) => - val tableEntry = groupListMap.getOrElse(key, null) - if (tableEntry != null) { - tableEntry - } else { groupListMap.update(key, list.head) list.head - } } mapped.toList } From 125e30b366032b8fe4e1365551a6ebaf61155e39 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Tue, 31 Jan 2023 19:06:43 +0530 Subject: [PATCH 24/31] Better sync of the critical section --- .../queryengine/HeldTaskCompletion.scala | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 9eeaf4c6cd22..79feb5906004 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -163,7 +163,7 @@ class HeldTaskCompletion( ) val mergedGroups = oldGroups ++ newGroups.par.map { case (k, v) => k -> { - val old = oldGroups.getOrElse(k, List()) + val old = oldGroups.getOrElse(k, List()) val value = synchronized(groupListMap.get(k)) val maxLen = value match { case Some(tableEntry: TableEntry) => tableEntry.path.length @@ -198,9 +198,9 @@ class HeldTaskCompletion( } val mergedList = getListFromGroups(mergedGroups, groupListMap) - resultTable.put(fingerprint, mergedList) - groupMap.update(fingerprint, mergedGroups) - mergedListMap.update(fingerprint, groupListMap) + synchronized(resultTable.put(fingerprint, mergedList)) + synchronized(groupMap.update(fingerprint, mergedGroups)) + synchronized(mergedListMap.update(fingerprint, groupListMap)) } } @@ -252,21 +252,21 @@ class HeldTaskCompletion( groupListMap: mutable.HashMap[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] ): List[TableEntry] = { val mapped = groups.map { case (key, list) => - groupListMap.update(key, list.head) - list.head + synchronized(groupListMap.update(key, list.head)) + list.head } mapped.toList } private def computePriority(entry: TableEntry): BigInt = { - var priority : BigInt = entry.path.length - val multiplier: BigInt = 131072 //2^17 + var priority: BigInt = entry.path.length + val multiplier: BigInt = 131072 // 2^17 - entry.path.foreach( element => { - priority = priority + element.callSiteStack.length*multiplier - priority = priority + element.node.id()*multiplier*64 - priority = priority + element.isOutputArg.hashCode()*multiplier*64 - priority = priority + element.visible.hashCode()*multiplier*64 + entry.path.foreach(element => { + priority = priority + element.callSiteStack.length * multiplier + priority = priority + element.node.id() * multiplier * 64 + priority = priority + element.isOutputArg.hashCode() * multiplier * 64 + priority = priority + element.visible.hashCode() * multiplier * 64 }) priority } From f61436667de4b847fb9fcb5466208d5eb5b45f79 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 1 Feb 2023 17:51:06 +0530 Subject: [PATCH 25/31] Removed merge list map since it is no longer needed --- .../queryengine/HeldTaskCompletion.scala | 43 ++++++------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 79feb5906004..e2fd7240da03 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -52,10 +52,6 @@ class HeldTaskCompletion( : mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry ]]] = mutable.Map() - val mergedListMap: mutable.Map[TaskFingerprint, mutable.HashMap[ - ((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), - TableEntry - ]] = mutable.Map() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -71,7 +67,7 @@ class HeldTaskCompletion( changed = noneChanged taskResultsPairs.foreach { case (t, resultsForTask, newResults) => - addCompletedTasksToMainTable(newResults.toList, groupMap, mergedListMap) + addCompletedTasksToMainTable(newResults.toList, groupMap) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } @@ -130,12 +126,8 @@ class HeldTaskCompletion( private def addCompletedTasksToMainTable( results: List[(TaskFingerprint, TableEntry)], groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ - TableEntry, - ]]], - mergedListMap: mutable.Map[ - TaskFingerprint, - mutable.HashMap[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] - ] + TableEntry + ]]] ): Unit = { results.groupBy(_._1).par.foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) @@ -157,25 +149,19 @@ class HeldTaskCompletion( } ) - val groupListMap = mergedListMap.getOrElse( - fingerprint, - mutable.HashMap[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry]() - ) val mergedGroups = oldGroups ++ newGroups.par.map { case (k, v) => k -> { - val old = oldGroups.getOrElse(k, List()) - val value = synchronized(groupListMap.get(k)) - val maxLen = value match { - case Some(tableEntry: TableEntry) => tableEntry.path.length - case None => 0 - } + val old = oldGroups.getOrElse(k, List()) + val maxLen = if (old.length > 0) { + old.head.path.length + } else { 0 } + val gtOrEqualMax = v.filter(x => x.path.length >= maxLen) val gtMax = gtOrEqualMax.filter(x => x.path.length > maxLen) if (gtMax.length > 0) { // new list contains elements with paths exceeding the max. retain new list elements only // that have max length - synchronized(groupListMap.remove(k)) var newMaxLen = maxLen gtMax.foreach(x => { if (x.path.length > newMaxLen) { @@ -190,17 +176,16 @@ class HeldTaskCompletion( } else { // new list contains all elements with paths less than or equal to the max but not exceeding it. // append new list elements that are equal to max - synchronized(groupListMap.remove(k)) val element = (old ++ gtOrEqualMax.par.filter(x => x.path.length == maxLen)).par.minBy(computePriority) List(element) } } } - val mergedList = getListFromGroups(mergedGroups, groupListMap) - + val mergedList = mergedGroups.map { case (_, list) => + list.head + }.toList synchronized(resultTable.put(fingerprint, mergedList)) synchronized(groupMap.update(fingerprint, mergedGroups)) - synchronized(mergedListMap.update(fingerprint, groupListMap)) } } @@ -248,11 +233,9 @@ class HeldTaskCompletion( } private def getListFromGroups( - groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]], - groupListMap: mutable.HashMap[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), TableEntry] + groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]] ): List[TableEntry] = { - val mapped = groups.map { case (key, list) => - synchronized(groupListMap.update(key, list.head)) + val mapped = groups.map { case (_, list) => list.head } mapped.toList From 328c287331ef78984c8c5e16f5b90b78e49e5100 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 1 Feb 2023 17:59:49 +0530 Subject: [PATCH 26/31] Introduced RW lock for proper protection --- .../queryengine/HeldTaskCompletion.scala | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index e2fd7240da03..ff45778cf59b 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -2,6 +2,7 @@ package io.joern.dataflowengineoss.queryengine import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode} +import java.util.concurrent.locks.ReentrantReadWriteLock import scala.collection.mutable import scala.collection.parallel.CollectionConverters._ import scala.language.postfixOps @@ -52,6 +53,7 @@ class HeldTaskCompletion( : mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry ]]] = mutable.Map() + val rwlock = new ReentrantReadWriteLock() while (changed.values.toList.contains(true)) { val taskResultsPairs = toProcess @@ -67,7 +69,7 @@ class HeldTaskCompletion( changed = noneChanged taskResultsPairs.foreach { case (t, resultsForTask, newResults) => - addCompletedTasksToMainTable(newResults.toList, groupMap) + addCompletedTasksToMainTable(newResults.toList, groupMap, rwlock) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } @@ -127,7 +129,8 @@ class HeldTaskCompletion( results: List[(TaskFingerprint, TableEntry)], groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry - ]]] + ]]], + rwlock : ReentrantReadWriteLock ): Unit = { results.groupBy(_._1).par.foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) @@ -138,6 +141,7 @@ class HeldTaskCompletion( (head, last) } + rwlock.readLock().lock() val old = resultTable.getOrElse(fingerprint, Vector()).toList val oldGroups = groupMap.getOrElse( fingerprint, @@ -148,6 +152,7 @@ class HeldTaskCompletion( (head, last) } ) + rwlock.readLock().unlock() val mergedGroups = oldGroups ++ newGroups.par.map { case (k, v) => k -> { @@ -184,8 +189,11 @@ class HeldTaskCompletion( val mergedList = mergedGroups.map { case (_, list) => list.head }.toList - synchronized(resultTable.put(fingerprint, mergedList)) - synchronized(groupMap.update(fingerprint, mergedGroups)) + + rwlock.writeLock().lock() + resultTable.put(fingerprint, mergedList) + groupMap.update(fingerprint, mergedGroups) + rwlock.writeLock().unlock() } } From 8375599cbcec21bf1dc5ad83106b0bc55add8657 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 1 Feb 2023 18:14:45 +0530 Subject: [PATCH 27/31] Parallization in the outer loop --- .../queryengine/HeldTaskCompletion.scala | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index ff45778cf59b..9db2175ab2dc 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -68,12 +68,12 @@ class HeldTaskCompletion( .seq changed = noneChanged - taskResultsPairs.foreach { case (t, resultsForTask, newResults) => + taskResultsPairs.par.foreach { case (t, resultsForTask, newResults) => addCompletedTasksToMainTable(newResults.toList, groupMap, rwlock) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } - resultsProducedByTask += (t -> resultsForTask) + synchronized(resultsProducedByTask += (t -> resultsForTask)) } } deduplicateResultTable() @@ -130,7 +130,7 @@ class HeldTaskCompletion( groupMap: mutable.Map[TaskFingerprint, Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[ TableEntry ]]], - rwlock : ReentrantReadWriteLock + rwlock: ReentrantReadWriteLock ): Unit = { results.groupBy(_._1).par.foreach { case (fingerprint, resultList) => val entries = resultList.map(_._2) @@ -240,15 +240,6 @@ class HeldTaskCompletion( .toList } - private def getListFromGroups( - groups: Map[((CfgNode, List[Call], Boolean), (CfgNode, List[Call], Boolean)), List[TableEntry]] - ): List[TableEntry] = { - val mapped = groups.map { case (_, list) => - list.head - } - mapped.toList - } - private def computePriority(entry: TableEntry): BigInt = { var priority: BigInt = entry.path.length val multiplier: BigInt = 131072 // 2^17 From c4ce9274beb57b0e3e6e0b418463858327530c53 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 1 Feb 2023 18:32:36 +0530 Subject: [PATCH 28/31] Removed outer loop parallelism due to race issues --- .../dataflowengineoss/queryengine/HeldTaskCompletion.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 9db2175ab2dc..03f7b8ce87bb 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -68,12 +68,12 @@ class HeldTaskCompletion( .seq changed = noneChanged - taskResultsPairs.par.foreach { case (t, resultsForTask, newResults) => + taskResultsPairs.foreach { case (t, resultsForTask, newResults) => addCompletedTasksToMainTable(newResults.toList, groupMap, rwlock) newResults.foreach { case (fingerprint, _) => changed += fingerprint -> true } - synchronized(resultsProducedByTask += (t -> resultsForTask)) + resultsProducedByTask += (t -> resultsForTask) } } deduplicateResultTable() From febe497d378fd8e4845ca8a02d4d5cc3ba6c632a Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 1 Feb 2023 18:54:17 +0530 Subject: [PATCH 29/31] Parallel group computation --- .../dataflowengineoss/queryengine/HeldTaskCompletion.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index 03f7b8ce87bb..a60b8f76d39e 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -133,7 +133,7 @@ class HeldTaskCompletion( rwlock: ReentrantReadWriteLock ): Unit = { results.groupBy(_._1).par.foreach { case (fingerprint, resultList) => - val entries = resultList.map(_._2) + val entries = resultList.par.map(_._2) val newGroups = entries .groupBy { result => val head = result.path.headOption.map(x => (x.node, x.callSiteStack, x.isOutputArg)).get @@ -154,7 +154,7 @@ class HeldTaskCompletion( ) rwlock.readLock().unlock() - val mergedGroups = oldGroups ++ newGroups.par.map { case (k, v) => + val mergedGroups = oldGroups ++ newGroups.map { case (k, v) => k -> { val old = oldGroups.getOrElse(k, List()) val maxLen = if (old.length > 0) { From 39db6446bd5222d7ed87e54345b010f9468c8bc2 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Wed, 1 Feb 2023 19:03:13 +0530 Subject: [PATCH 30/31] Line formatting --- .../dataflowengineoss/queryengine/HeldTaskCompletion.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index a60b8f76d39e..e008610cb350 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -186,9 +186,7 @@ class HeldTaskCompletion( } } } - val mergedList = mergedGroups.map { case (_, list) => - list.head - }.toList + val mergedList = mergedGroups.map { case (_, list) => list.head}.toList rwlock.writeLock().lock() resultTable.put(fingerprint, mergedList) From 5ed522d8cef1ebc58912ceec72e54f38fadc6991 Mon Sep 17 00:00:00 2001 From: rahul-privado Date: Thu, 2 Feb 2023 18:47:23 +0530 Subject: [PATCH 31/31] Made priority computation more accurate --- .../dataflowengineoss/queryengine/HeldTaskCompletion.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala index e008610cb350..f6410ddc1064 100644 --- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala +++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/HeldTaskCompletion.scala @@ -245,7 +245,7 @@ class HeldTaskCompletion( entry.path.foreach(element => { priority = priority + element.callSiteStack.length * multiplier priority = priority + element.node.id() * multiplier * 64 - priority = priority + element.isOutputArg.hashCode() * multiplier * 64 + priority = priority + element.isOutputArg.hashCode() * multiplier * multiplier *64 priority = priority + element.visible.hashCode() * multiplier * 64 }) priority